Commit eedba17d authored by Amir MOHAMMADI's avatar Amir MOHAMMADI

Merge branch 'dask-pipelines'

parents b744beab 2f4ec978
Pipeline #44916 failed with stages
in 58 seconds
......@@ -2,7 +2,6 @@ from .utils import *
from . import database
from . import algorithm
from . import tools
from . import script
from . import test
......
from .Algorithm import Algorithm
from .SVM import SVM
# from .SVM import SVM
from .OneClassGMM import OneClassGMM
from .OneClassGMM2 import OneClassGMM2
from .GMM import GMM
from .LogRegr import LogRegr
from .SVMCascadePCA import SVMCascadePCA
# from .SVMCascadePCA import SVMCascadePCA
from .Predictions import Predictions, VideoPredictions
from .MLP import MLP
# from .MLP import MLP
from .PadLDA import PadLDA
# to fix sphinx warnings of not able to find classes, when path is shortened
......@@ -31,14 +31,14 @@ def __appropriate__(*args):
__appropriate__(
Algorithm,
SVM,
# SVM,
OneClassGMM,
OneClassGMM2,
LogRegr,
SVMCascadePCA,
# SVMCascadePCA,
Predictions,
VideoPredictions,
MLP,
# MLP,
PadLDA
)
......
......@@ -6,8 +6,7 @@
#
import abc
import bob.bio.base
import bob.pad.base
from bob.db.base.utils import sort_files
from bob.bio.base.database import BioDatabase
......@@ -83,7 +82,6 @@ class PadDatabase(BioDatabase):
"""
return []
@abc.abstractmethod
def annotations(self, file):
"""
Returns the annotations for the given File object, if available.
......@@ -149,13 +147,13 @@ class PadDatabase(BioDatabase):
files : [:py:class:`bob.pad.base.database.PadFile`]
The sorted and unique list of all files of the database.
"""
realset = self.sort(self.objects(protocol=self.protocol, groups=groups, purposes='real', **self.all_files_options))
attackset = self.sort(self.objects(protocol=self.protocol, groups=groups, purposes='attack', **self.all_files_options))
realset = sort_files(self.objects(protocol=self.protocol, groups=groups, purposes='real', **self.all_files_options))
attackset = sort_files(self.objects(protocol=self.protocol, groups=groups, purposes='attack', **self.all_files_options))
if flat:
return realset + attackset
return [realset, attackset]
def training_files(self, step=None, arrange_by_client=False):
def training_files(self, step=None, arrange_by_client=False, **kwargs):
"""training_files(step = None, arrange_by_client = False) -> files
Returns all training File objects
......@@ -170,4 +168,4 @@ class PadDatabase(BioDatabase):
The (arranged) list of files used for the training.
"""
return self.all_files(groups=('train',))
return self.all_files(groups=('train',), **kwargs)
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# @author: Pavel Korshunov <pavel.korshunov@idiap.ch>
# @date: Wed May 18 10:09:22 CET 2016
#
import bob.bio.base.database
class PadFile(bob.bio.base.database.BioFile):
"""A simple base class that defines basic properties of File object for the use in PAD experiments"""
def __init__(self, client_id, path, attack_type=None, file_id=None):
"""**Constructor Documentation**
Initialize the File object with the minimum required data.
def __init__(
self,
client_id,
path,
attack_type=None,
file_id=None,
original_directory=None,
original_extension=None,
annotation_directory=None,
annotation_extension=None,
annotation_type=None,
):
"""Initialize the File object with the minimum required data.
Parameters:
......@@ -22,7 +26,16 @@ class PadFile(bob.bio.base.database.BioFile):
For client_id, path and file_id, please refer to :py:class:`bob.bio.base.database.BioFile` constructor
"""
super(PadFile, self).__init__(client_id, path, file_id)
super(PadFile, self).__init__(
client_id,
path,
file_id,
original_directory=original_directory,
original_extension=original_extension,
annotation_directory=annotation_directory,
annotation_extension=annotation_extension,
annotation_type=annotation_type,
)
if attack_type is not None:
assert isinstance(attack_type, str)
......@@ -30,6 +43,3 @@ class PadFile(bob.bio.base.database.BioFile):
# just copy the information
# The attack type of the sample, None if it is a genuine sample.
self.attack_type = attack_type
def __repr__(self):
return f"<File({self.id}: {self.path}, {self.client_id}, {self.attack_type})>"
from .abstract_classes import Database
from .legacy import DatabaseConnector
from .implemented import FrameContainersToFrames
from abc import ABCMeta, abstractmethod
class Database(metaclass=ABCMeta):
"""Base class for Vanilla PAD pipeline
"""
@abstractmethod
def fit_samples(self):
"""Returns :py:class:`Sample`'s to train a PAD model
Returns
-------
samples : list
List of samples for model training.
"""
pass
@abstractmethod
def predict_samples(self, group="dev"):
"""Returns :py:class:`Sample`'s to be scored.
Parameters
----------
group : :py:class:`str`, optional
Limits samples to this group
Returns
-------
samples : list
List of samples to be scored.
"""
pass
from sklearn.base import TransformerMixin, BaseEstimator
import bob.pipelines as mario
import logging
logger = logging.getLogger(__name__)
class FrameContainersToFrames(TransformerMixin, BaseEstimator):
"""Expands frame containers to frame-based samples only when transform is called.
When fit_transform is called, it just converts frame containers to numpy arrays.
"""
def transform(self, video_samples):
logger.info(
f"Calling {self.__class__.__name__}.transform from FrameContainersToFrames"
)
output = []
for sample in video_samples:
for frame, frame_id in zip(sample.data, sample.data.indices):
new_sample = mario.Sample(frame, frame_id=frame_id, parent=sample)
output.append(new_sample)
return output
def fit(self, X, y=None, **fit_params):
return self
def _more_tags(self):
return {"stateless": True, "requires_fit": False}
"""Re-usable blocks for legacy bob.pad.base algorithms"""
from .abstract_classes import Database
from bob.pipelines.sample import DelayedSample
import functools
import logging
logger = logging.getLogger(__name__)
def _padfile_to_delayed_sample(padfile, database):
return DelayedSample(
load=padfile.load,
subject=str(padfile.client_id),
attack_type=padfile.attack_type,
key=padfile.path,
annotations=padfile.annotations,
is_bonafide=padfile.attack_type is None,
)
class DatabaseConnector(Database):
"""Wraps a bob.pad.base database and generates conforming samples
This connector allows wrapping generic bob.pad.base datasets and generate samples
that conform to the specifications of pad pipelines defined in this package.
Parameters
----------
database : object
An instantiated version of a bob.pad.base.Database object
"""
def __init__(
self, database, annotation_type="eyes-center", fixed_positions=None, **kwargs
):
super().__init__(**kwargs)
self.database = database
self.annotation_type = annotation_type
self.fixed_positions = fixed_positions
def fit_samples(self):
objects = self.database.training_files(flat=True)
return [_padfile_to_delayed_sample(k, self.database) for k in objects]
def predict_samples(self, group="dev"):
objects = self.database.all_files(groups=group, flat=True)
return [_padfile_to_delayed_sample(k, self.database) for k in objects]
......@@ -11,7 +11,9 @@ from .error_utils import negatives_per_pai_and_positives
from functools import partial
SCORE_FORMAT = (
"Files must be 4-col format, see " ":py:func:`bob.bio.base.score.load.four_column`."
"Files must be 4-col or 5-col format, see "
":py:func:`bob.bio.base_legacy.score.load.four_column` and"
":py:func:`bob.bio.base_legacy.score.load.five_column`."
)
CRITERIA = (
"eer",
......
"""Executes PAD pipeline"""
import logging
import bob.pipelines as mario
import click
import joblib
from bob.extension.scripts.click_helper import ConfigCommand
from bob.extension.scripts.click_helper import ResourceOption
from bob.extension.scripts.click_helper import verbosity_option
logger = logging.getLogger(__name__)
EPILOG = """\b
Command line examples\n
-----------------------
$ bob pad vanilla-pad my_experiment.py -vv
my_experiment.py must contain the following elements:
>>> preprocessor = my_preprocessor() \n
>>> extractor = my_extractor() \n
>>> algorithm = my_algorithm() \n
>>> checkpoints = EXPLAIN CHECKPOINTING \n
\b
Look at the following example
$ bob pipelines vanilla-biometrics ./bob/pipelines/config/distributed/sge_iobig_16cores.py \
./bob/pipelines/config/database/mobio_male.py \
./bob/pipelines/config/baselines/facecrop_pca.py
\b
TODO: Work out this help
"""
@click.command(
entry_point_group="bob.pad.config",
cls=ConfigCommand,
epilog=EPILOG,
)
@click.option(
"--pipeline",
"-p",
required=True,
entry_point_group="sklearn.pipeline",
help="Feature extraction algorithm",
cls=ResourceOption,
)
@click.option(
"--database",
"-d",
required=True,
cls=ResourceOption,
entry_point_group="bob.pad.database",
help="PAD Database connector (class that implements the methods: `fit_samples`, `predict_samples`)",
)
@click.option(
"--dask-client",
"-l",
required=False,
cls=ResourceOption,
help="Dask client for the execution of the pipeline.",
)
@click.option(
"--group",
"-g",
"groups",
type=click.Choice(["dev", "eval"]),
multiple=True,
default=("dev",),
help="If given, this value will limit the experiments belonging to a particular group",
)
@click.option(
"-o",
"--output",
show_default=True,
default="results",
help="Name of output directory",
)
@click.option(
"--checkpoint",
"-c",
is_flag=True,
help="If set, it will checkpoint all steps of the pipeline. Checkpoints will be saved in `--output`.",
cls=ResourceOption,
)
@verbosity_option(cls=ResourceOption)
def vanilla_pad(pipeline, database, dask_client, groups, output, checkpoint, **kwargs):
"""Runs the simplest PAD pipeline.
Such pipeline consists into three sub-pipelines.
In all of them, given raw data as input it does the following steps:
Sub-pipeline 1:\n
---------------
Training background model. Some biometric algorithms demands the training of background model, for instance, PCA/LDA matrix or a Neural networks. This sub-pipeline handles that and it consists of 3 steps:
\b
raw_data --> preprocessing >> feature extraction >> train background model --> background_model
\b
Sub-pipeline 2:\n
---------------
Creation of biometric references: This is a standard step in a biometric pipelines.
Given a set of samples of one identity, create a biometric reference (a.k.a template) for sub identity. This sub-pipeline handles that in 3 steps and they are the following:
\b
raw_data --> preprocessing >> feature extraction >> enroll(background_model) --> biometric_reference
Note that this sub-pipeline depends on the previous one
Sub-pipeline 3:\n
---------------
Probing: This is another standard step in biometric pipelines. Given one sample and one biometric reference, computes a score. Such score has different meanings depending on the scoring method your biometric algorithm uses. It's out of scope to explain in a help message to explain what scoring is for different biometric algorithms.
raw_data --> preprocessing >> feature extraction >> probe(biometric_reference, background_model) --> score
Note that this sub-pipeline depends on the two previous ones
"""
import gzip
import os
from glob import glob
import dask.bag
os.makedirs(output, exist_ok=True)
if checkpoint:
pipeline = mario.wrap(
["checkpoint"], pipeline, features_dir=output, model_path=output
)
if dask_client is not None:
pipeline = mario.wrap(["dask"], pipeline)
# train the pipeline
fit_samples = database.fit_samples() # [::50]
pipeline = pipeline.fit(fit_samples)
for group in groups:
logger.info(f"Running vanilla biometrics for group {group}")
predict_samples = database.predict_samples(group=group) # [::50]
result = pipeline.decision_function(predict_samples)
with open(os.path.join(output, f"scores-{group}"), "w") as f:
if isinstance(result, dask.bag.core.Bag):
if dask_client is None:
logger.warning(
"`dask_client` not set. Your pipeline will run locally"
)
# write each partition into a zipped txt file
result = result.map(pad_predicted_sample_to_score_line)
prefix, postfix = f"{output}/scores/scores-{group}-", ".txt.gz"
pattern = f"{prefix}*{postfix}"
os.makedirs(os.path.dirname(prefix), exist_ok=True)
logger.info("Writing bag results into files ...")
result.to_textfiles(pattern, last_endline=True, scheduler=dask_client)
# concatenate scores into one score file
for path in sorted(
glob(pattern),
key=lambda l: int(l.replace(prefix, "").replace(postfix, "")),
):
with gzip.open(path, "rt") as f2:
f.write(f2.read())
else:
for sample in result:
f.write(pad_predicted_sample_to_score_line(sample, endl="\n"))
def pad_predicted_sample_to_score_line(sample, endl=""):
claimed_id, test_label, score = sample.subject, sample.key, sample.data
# # use the model_label field to indicate frame number
# model_label = None
# if hasattr(sample, "frame_id"):
# model_label = sample.frame_id
real_id = claimed_id if sample.is_bonafide else sample.attack_type
return f"{claimed_id} {real_id} {test_label} {score}{endl}"
# return f"{claimed_id} {model_label} {real_id} {test_label} {score}{endl}"
......@@ -4,6 +4,7 @@
import numpy as np
import bob.bio.video
from bob.io.base import vstack_features
import bob.pipelines as mario
import itertools
......@@ -84,7 +85,7 @@ def convert_list_of_frame_cont_to_array(frame_containers):
return x.as_array()
return x
features_array = vstack_features(reader, frame_containers)
features_array = mario.utils.vstack_features(reader, frame_containers)
return features_array
......
......@@ -31,6 +31,21 @@ requirements:
- bob.learn.linear
- bob.learn.libsvm
- bob.learn.mlp
- bob.learn.activation
- bob.learn.em
- bob.measure
- bob.sp
- bob.ip.base
- bob.ip.color
- bob.ip.gabor
- bob.measure
- bob.db.base
- bob.db.atnt
- bob.ip.facedetect
- bob.io.video
- matplotlib {{ matplotlib }}
- six {{ six }}
- scipy {{ scipy }}
- scikit-learn {{ scikit_learn }}
run:
- python
......@@ -41,7 +56,7 @@ test:
imports:
- {{ name }}
commands:
- spoof.py --help
- bob pad --help
- nosetests --with-coverage --cover-package={{ name }} -sv {{ name }}
- sphinx-build -aEW {{ project_dir }}/doc {{ project_dir }}/sphinx
- sphinx-build -aEb doctest {{ project_dir }}/doc sphinx
......
......@@ -10,15 +10,12 @@ eggs = bob.extension
bob.sp
bob.ip.gabor
bob.ip.base
bob.ip.draw
bob.ip.color
bob.measure
bob.learn.boosting
bob.io.video
bob.learn.activation
bob.io.matlab
bob.io.image
bob.ip.flandmark
bob.ip.facedetect
bob.learn.linear
bob.learn.em
......@@ -41,15 +38,12 @@ develop = src/bob.extension
src/bob.sp
src/bob.ip.gabor
src/bob.ip.base
src/bob.ip.draw
src/bob.ip.color
src/bob.measure
src/bob.learn.boosting
src/bob.io.video
src/bob.learn.activation
src/bob.io.matlab
src/bob.io.image
src/bob.ip.flandmark
src/bob.ip.facedetect
src/bob.learn.linear
src/bob.learn.em
......@@ -75,15 +69,12 @@ bob.io.base = git git@gitlab.idiap.ch:bob/bob.io.base
bob.sp = git git@gitlab.idiap.ch:bob/bob.sp
bob.ip.gabor = git git@gitlab.idiap.ch:bob/bob.ip.gabor
bob.ip.base = git git@gitlab.idiap.ch:bob/bob.ip.base
bob.ip.draw = git git@gitlab.idiap.ch:bob/bob.ip.draw
bob.ip.color = git git@gitlab.idiap.ch:bob/bob.ip.color
bob.measure = git git@gitlab.idiap.ch:bob/bob.measure
bob.learn.boosting = git git@gitlab.idiap.ch:bob/bob.learn.boosting
bob.io.video = git git@gitlab.idiap.ch:bob/bob.io.video
bob.learn.activation = git git@gitlab.idiap.ch:bob/bob.learn.activation
bob.io.matlab = git git@gitlab.idiap.ch:bob/bob.io.matlab
bob.io.image = git git@gitlab.idiap.ch:bob/bob.io.image
bob.ip.flandmark = git git@gitlab.idiap.ch:bob/bob.ip.flandmark
bob.ip.facedetect = git git@gitlab.idiap.ch:bob/bob.ip.facedetect
bob.learn.linear = git git@gitlab.idiap.ch:bob/bob.learn.linear
bob.learn.em = git git@gitlab.idiap.ch:bob/bob.learn.em
......
......@@ -133,6 +133,7 @@ setup(
# main entry for bob pad cli
'bob.cli': [
'bio = bob.bio.base_legacy.script.bio:bio',
'pad = bob.pad.base.script.pad:pad',
'vulnerability = bob.pad.base.script.vulnerability:vulnerability',
],
......@@ -149,6 +150,7 @@ setup(
'evaluate = bob.pad.base.script.pad_commands:evaluate',
'cross = bob.pad.base.script.cross:cross',
'finalize-scores = bob.pad.base.script.finalize_scores:finalize_scores',
'vanilla-pad = bob.pad.base.script.vanilla_pad:vanilla_pad',
],
# bob vuln scripts
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment