Commit 8e4452fe authored by Amir MOHAMMADI's avatar Amir MOHAMMADI

Merge branch 'dask-pipelines' into 'master'

Remove deprecated code

See merge request !77
parents 49ee5285 ee6a0cad
Pipeline #45294 failed with stages
in 3 minutes and 20 seconds
......@@ -15,12 +15,13 @@
Scripts to run anti-spoofing experiments
========================================
This package is part of the signal-processing and machine learning toolbox
Bob_. This package is the base of ``bob.pad`` family of packages, which allow to run comparable and reproducible
presentation attack detection (PAD) experiments on publicly available databases.
This package is part of the signal-processing and machine learning toolbox Bob_.
This package is the base of ``bob.pad`` family of packages, which allow to run
comparable and reproducible presentation attack detection (PAD) experiments on
publicly available databases.
This package contains basic functionality to run PAD experiments.
It provides a generic ``./bin/spoof.py`` script that takes several parameters, including:
It provides a generic API for PAD including:
* A database and its evaluation protocol
* A data preprocessing algorithm
......@@ -28,9 +29,11 @@ It provides a generic ``./bin/spoof.py`` script that takes several parameters, i
* A PAD algorithm
All these steps of the PAD system are given as configuration files.
All the algorithms are standardized on top of scikit-learn estimators.
In this base class implementation, only a core functionality is implemented. The specialized algorithms should
be provided by other packages, which are usually in the ``bob.pad`` namespace, like a ``bob.pad.voice`` package.
In this base package, only a core functionality is implemented. The specialized
algorithms should be provided by other packages, which are usually in the
``bob.pad`` namespace, like ``bob.pad.face``.
Installation
------------
......@@ -50,5 +53,5 @@ development `mailing list`_.
.. Place your references here:
.. _bob: https://www.idiap.ch/software/bob
.. _installation: https://gitlab.idiap.ch/bob/bob/wikis/Installation
.. _installation: https://www.idiap.ch/software/bob/install
.. _mailing list: https://groups.google.com/forum/?fromgroups#!forum/bob-devel
from .utils import *
from . import database
from . import algorithm
from . import script
from . import test
......
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Pavel Korshunov <pavel.korshunov@idiap.ch>
# @date: Wed 19 Aug 13:43:21 2015
#
import numpy
import os
from bob.bio.base import utils
class Algorithm(object):
"""This is the base class for all anti-spoofing algorithms.
It defines the minimum requirements for all derived algorithm classes.
Call the constructor in derived class implementations.
If your derived algorithm performs feature projection, please register this here.
If it needs training for the projector, please set this here, too.
**Parameters:**
performs_projection : bool
Set to ``True`` if your derived algorithm performs a projection.
Also implement the :py:meth:`project` function, and the :py:meth:`load_projector` if necessary.
requires_projector_training : bool
Only valid, when ``performs_projection = True``.
Set this flag to ``False``, when the projection is applied, but the projector does not need to be trained.
kwargs : ``key=value`` pairs
A list of keyword arguments to be written in the `__str__` function.
"""
def __init__(
self,
performs_projection=False, # enable if your tool will project the features
requires_projector_training=True, # by default, the projector needs training, if projection is enabled
**kwargs # parameters from the derived class that should be reported in the __str__() function
):
self.performs_projection = performs_projection
self.requires_projector_training = performs_projection and requires_projector_training
self._kwargs = kwargs
def __str__(self):
"""__str__() -> info
This function returns all parameters of this class (and its derived class).
**Returns:**
info : str
A string containing the full information of all parameters of this (and the derived) class.
"""
return "%s(%s)" % (str(self.__class__), ", ".join(
["%s=%s" % (key, value) for key, value in self._kwargs.items() if value is not None]))
def project(self, feature):
"""project(feature) -> projected
This function will project the given feature.
It must be overwritten by derived classes, as soon as ``performs_projection = True`` was set in the constructor.
It is assured that the :py:meth:`load_projector` was called once before the ``project`` function is executed.
**Parameters:**
feature : object
The feature to be projected.
**Returns:**
projected : object
The projected features.
Must be writable with the :py:meth:`write_feature` function and readable with the :py:meth:`read_feature` function.
"""
raise NotImplementedError("Please overwrite this function in your derived class")
def score(self, toscore):
"""score(toscore) -> score
This function will compute the score for the given object ``toscore``.
It must be overwritten by derived classes.
**Parameters:**
toscore : object
The object to compute the score for. This will be the output of
extractor if performs_projection is False, otherwise this will be the
output of project method of the algorithm.
**Returns:**
score : float
A score value for the object ``toscore``.
"""
raise NotImplementedError("Please overwrite this function in your derived class")
def score_for_multiple_projections(self, toscore):
"""scorescore_for_multiple_projections(toscore) -> score
This function will compute the score for a list of objects in ``toscore``.
It must be overwritten by derived classes.
**Parameters:**
toscore : [object]
A list of objects to compute the score for.
**Returns:**
score : float
A score value for the object ``toscore``.
"""
raise NotImplementedError("Please overwrite this function in your derived class")
############################################################
### Special functions that might be overwritten on need
############################################################
def write_feature(self, feature, feature_file):
"""Saves the given *projected* feature to a file with the given name.
In this base class implementation:
- If the given feature has a ``save`` attribute, it calls ``feature.save(bob.io.base.HDF5File(feature_file), 'w')``.
In this case, the given feature_file might be either a file name or a bob.io.base.HDF5File.
- Otherwise, it uses :py:func:`bob.io.base.save` to do that.
If you have a different format, please overwrite this function.
Please register 'performs_projection = True' in the constructor to enable this function.
**Parameters:**
feature : object
A feature as returned by the :py:meth:`project` function, which should be written.
feature_file : str or :py:class:`bob.io.base.HDF5File`
The file open for writing, or the file name to write to.
"""
utils.save(feature, feature_file)
def read_feature(self, feature_file):
"""read_feature(feature_file) -> feature
Reads the *projected* feature from file.
In this base class implementation, it uses :py:func:`bob.io.base.load` to do that.
If you have different format, please overwrite this function.
Please register ``performs_projection = True`` in the constructor to enable this function.
**Parameters:**
feature_file : str or :py:class:`bob.io.base.HDF5File`
The file open for reading, or the file name to read from.
**Returns:**
feature : object
The feature that was read from file.
"""
return utils.load(feature_file)
def train_projector(self, training_features, projector_file):
"""This function can be overwritten to train the feature projector.
If you do this, please also register the function by calling this base class constructor
and enabling the training by ``requires_projector_training = True``.
**Parameters:**
training_features : [object] or [[object]]
A list of *extracted* features that can be used for training the projector.
Features will be provided in a single list
projector_file : str
The file to write.
This file should be readable with the :py:meth:`load_projector` function.
"""
raise NotImplementedError(
"Please overwrite this function in your derived class, or unset the 'requires_projector_training' option in the constructor.")
def load_projector(self, projector_file):
"""Loads the parameters required for feature projection from file.
This function usually is useful in combination with the :py:meth:`train_projector` function.
In this base class implementation, it does nothing.
Please register `performs_projection = True` in the constructor to enable this function.
**Parameters:**
projector_file : str
The file to read the projector from.
"""
pass
This diff is collapsed.
This diff is collapsed.
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
import numpy
from bob.pad.base.algorithm import Algorithm
import bob.learn.mlp
import bob.io.base
import logging
logger = logging.getLogger(__name__)
class MLP(Algorithm):
"""Interfaces an MLP classifier used for PAD
Attributes
----------
hidden_units : :py:obj:`tuple` of :any:`int`
The number of hidden units in each hidden layer
max_iter : :any:`int`
The maximum number of training iterations
precision : :any:`float`
criterion to stop the training: if the difference
between current and last loss is smaller than
this number, then stop training.
"""
def __init__(self, hidden_units=(10, 10), max_iter=1000, precision=0.001, **kwargs):
"""Init function
Parameters
----------
hidden_units : :py:obj:`tuple` of int
The number of hidden units in each hidden layer
max_iter : int
The maximum number of training iterations
precision : float
criterion to stop the training: if the difference
between current and last loss is smaller than
this number, then stop training.
"""
Algorithm.__init__(self,
performs_projection=True,
requires_projector_training=True,
**kwargs)
self.hidden_units = hidden_units
self.max_iter = max_iter
self.precision = precision
self.mlp = None
def train_projector(self, training_features, projector_file):
"""Trains the MLP
Parameters
----------
training_features : :any:`list` of :py:class:`numpy.ndarray`
Data used to train the MLP. The real attempts are in training_features[0] and the attacks are in training_features[1]
projector_file : str
Filename where to save the trained model.
"""
# training is done in batch (i.e. using all training data)
batch_size = len(training_features[0]) + len(training_features[1])
# The labels
label_real = numpy.zeros((len(training_features[0]), 2), dtype='float64')
label_real[:, 0] = 1
label_attack = numpy.zeros((len(training_features[1]), 2), dtype='float64')
label_attack[:, 1] = 0
real = numpy.array(training_features[0])
attack = numpy.array(training_features[1])
X = numpy.vstack([real, attack])
Y = numpy.vstack([label_real, label_attack])
# Building MLP architecture
input_dim = real.shape[1]
shape = []
shape.append(input_dim)
for i in range(len(self.hidden_units)):
shape.append(self.hidden_units[i])
# last layer contains two units: one for each class (i.e. real and attack)
shape.append(2)
shape = tuple(shape)
self.mlp = bob.learn.mlp.Machine(shape)
self.mlp.output_activation = bob.learn.activation.Logistic()
self.mlp.randomize()
trainer = bob.learn.mlp.BackProp(batch_size, bob.learn.mlp.CrossEntropyLoss(self.mlp.output_activation), self.mlp, train_biases=True)
n_iter = 0
previous_cost = 0
current_cost = 1
while (n_iter < self.max_iter) and (abs(previous_cost - current_cost) > self.precision):
previous_cost = current_cost
trainer.train(self.mlp, X, Y)
current_cost = trainer.cost(self.mlp, X, Y)
n_iter += 1
logger.debug("Iteration {} -> cost = {} (previous = {}, max_iter = {})".format(n_iter, trainer.cost(self.mlp, X, Y), previous_cost, self.max_iter))
f = bob.io.base.HDF5File(projector_file, 'w')
self.mlp.save(f)
def project(self, feature):
"""Project the given feature
Parameters
----------
feature : :py:class:`numpy.ndarray`
The feature to classify
Returns
-------
numpy.ndarray
The value of the two units in the last layer of the MLP.
"""
# if isinstance(feature, FrameContainer):
# feature = convert_frame_cont_to_array(feature)
return self.mlp(feature)
def score(self, toscore):
"""Returns the probability of the real class.
Parameters
----------
toscore : :py:class:`numpy.ndarray`
Returns
-------
float
probability of the authentication attempt to be real.
"""
if toscore.ndim == 1:
return [toscore[0]]
else:
return numpy.mean([toscore[:, 0]])
This diff is collapsed.
# -*- coding: utf-8 -*-
# @author: Amir Mohammadi
from bob.pad.base.algorithm import Algorithm
from bob.pad.base.utils import convert_and_prepare_features
from bob.bio.gmm.algorithm import GMM
import logging
import numpy as np
from collections.abc import Iterable
from multiprocessing import cpu_count
import joblib
logger = logging.getLogger(__name__)
def bic(trainer, machine, X):
"""Bayesian information criterion for the current model on the input X.
Parameters
----------
X : array of shape (n_samples, n_dimensions)
Returns
-------
bic : float
The lower the better.
"""
log_likelihood = trainer.compute_likelihood(machine)
n_parameters = (
machine.means.size + machine.variances.size + len(machine.weights) - 1
)
return -2 * log_likelihood * X.shape[0] + n_parameters * np.log(X.shape[0])
class OneClassGMM2(Algorithm):
"""A one class GMM implementation based on Bob's GMM implementation which is more
stable than scikit-learn's one."""
def __init__(
self,
# parameters for the GMM
number_of_gaussians,
# parameters of UBM training
kmeans_training_iterations=25, # Maximum number of iterations for K-Means
gmm_training_iterations=25, # Maximum number of iterations for ML GMM Training
training_threshold=5e-4, # Threshold to end the ML training
variance_threshold=5e-4, # Minimum value that a variance can reach
update_weights=True,
update_means=True,
update_variances=True,
n_threads=cpu_count(),
preprocessor=None, # a scikit learn preprocessor, can be PCA for example
**kwargs
):
kwargs.setdefault("performs_projection", True)
kwargs.setdefault("requires_projector_training", True)
super().__init__(**kwargs)
self.gmm_alg = GMM(
number_of_gaussians=number_of_gaussians,
kmeans_training_iterations=kmeans_training_iterations,
gmm_training_iterations=gmm_training_iterations,
training_threshold=training_threshold,
variance_threshold=variance_threshold,
update_weights=update_weights,
update_means=update_means,
update_variances=update_variances,
n_threads=n_threads,
)
self.number_of_gaussians = number_of_gaussians
self.preprocessor = preprocessor
def train_projector(self, training_features, projector_file):
del training_features[1]
real = convert_and_prepare_features(training_features[0], dtype="float64")
del training_features[0]
if self.preprocessor is not None:
real = self.preprocessor.fit_transform(real)
joblib.dump(self.preprocessor, projector_file + ".pkl")
if isinstance(self.number_of_gaussians, Iterable):
logger.info(
"Performing grid search for GMM on number_of_gaussians: %s",
self.number_of_gaussians,
)
lowest_bic = np.infty
best_n_gaussians = None
for nc in self.number_of_gaussians:
logger.info("Testing for number_of_gaussians: %s", nc)
self.gmm_alg.gaussians = nc
self.gmm_alg.train_ubm(real)
bic_ = bic(self.gmm_alg.ubm_trainer, self.gmm_alg.ubm, real)
logger.info("BIC for number_of_gaussians: %s is %s", nc, bic_)
if bic_ < lowest_bic:
gmm = self.gmm_alg.ubm
lowest_bic = bic_
best_n_gaussians = nc
logger.info("Best parameters so far: number_of_gaussians %s", nc)
assert best_n_gaussians is not None
self.gmm_alg.gaussians = best_n_gaussians
else:
self.gmm_alg.train_ubm(real)
gmm = self.gmm_alg.ubm
self.gmm_alg.ubm = gmm
self.gmm_alg.save_ubm(projector_file)
def load_projector(self, projector_file):
self.gmm_alg.load_ubm(projector_file)
if self.preprocessor is not None:
self.preprocessor = joblib.load(projector_file + ".pkl")
def project(self, feature):
feature = convert_and_prepare_features([feature], dtype="float64")[0]
if self.preprocessor is not None:
feature = self.preprocessor.transform(feature)
return self.gmm_alg.ubm(feature)
def score(self, toscore):
return [toscore]
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
import numpy
from bob.bio.base.algorithm import LDA
class PadLDA(LDA):
"""Wrapper for bob.bio.base.algorithm.LDA,
Here, LDA is used in a PAD context. This means that the feature
will be projected on a single dimension subspace, which acts as a score
For more details, you may want to have a look at
`bob.learn.linear Documentation`_
.. _bob.learn.linear Documentation:
https://www.idiap.ch/software/bob/docs/bob/bob.learn.linear/stable/index.html
Attributes
----------
lda_subspace_dimension : int
the dimension of the LDA subspace. In the PAD case, the default
value is *always* used, and corresponds to the number of classes
in the training set (i.e. 2).
pca_subspace_dimension : int
The dimension of the PCA subspace to be applied
before on the data, before applying LDA.
use_pinv : bool
Use the pseudo-inverse in LDA computation.
"""
def __init__(self,
lda_subspace_dimension = None, # if set, the LDA subspace will be truncated to the given number of dimensions; by default it is limited to the number of classes in the training set
pca_subspace_dimension = None, # if set, a PCA subspace truncation is performed before applying LDA; might be integral or float
use_pinv = False,
**kwargs
):
"""Init function
Parameters
----------
lda_subspace_dimension : int
the dimension of the LDA subspace. In the PAD case, the default
value is *always* used, and corresponds to the number of classes
in the training set (i.e. 2).
pca_subspace_dimension : int
The dimension of the PCA subspace to be applied
before on the data, before applying LDA.
use_pinv : bool
Use the pseudo-inverse in LDA computation.
"""
super(PadLDA, self).__init__(
lda_subspace_dimension = lda_subspace_dimension,
pca_subspace_dimension = pca_subspace_dimension,
use_pinv = use_pinv,
**kwargs
)
def score(self, toscore):
return [toscore[0]]
from bob.pad.base.algorithm import Algorithm
import numpy
class Predictions(Algorithm):
"""An algorithm that takes the precomputed predictions and uses them for
scoring."""
def __init__(self, **kwargs):
super(Predictions, self).__init__(**kwargs)
def score(self, predictions):
predictions = numpy.asarray(predictions)
if predictions.size == 1:
# output of a sigmoid binary layer
return predictions
# Assuming the predictions are the output of a softmax layer
return [predictions[1]]
class VideoPredictions(Algorithm):
"""An algorithm that takes the precomputed predictions and uses them for
scoring."""
def __init__(self, axis=1, frame_level_scoring=False, **kwargs):
super(VideoPredictions, self).__init__(**kwargs)
self.frame_level_scoring = frame_level_scoring
self.axis = axis
def score(self, predictions):
# Assuming the predictions are the output of a softmax layer
if len(predictions) == 0:
return [float("nan")]
predictions = predictions.as_array()[:, self.axis]
if self.frame_level_scoring:
return predictions
else:
return [numpy.mean(predictions)]
This diff is collapsed.
This diff is collapsed.
from .Algorithm import Algorithm
# from .SVM import SVM
from .OneClassGMM import OneClassGMM
from .OneClassGMM2 import OneClassGMM2
from .GMM import GMM
from .LogRegr import LogRegr
# from .SVMCascadePCA import SVMCascadePCA
from .Predictions import Predictions, VideoPredictions
# from .MLP import MLP
from .PadLDA import PadLDA
# to fix sphinx warnings of not able to find classes, when path is shortened
def __appropriate__(*args):
"""Says object was actually declared here, and not in the import module.
Fixing sphinx warnings of not being able to find classes, when path is
shortened.
Parameters
----------
*args
The objects that you want sphinx to believe that are defined here.
Resolves `Sphinx referencing issues <https//github.com/sphinx-
doc/sphinx/issues/3048>`
"""
for obj in args:
obj.__module__ = __name__
__appropriate__(
Algorithm,
# SVM,
OneClassGMM,
OneClassGMM2,
LogRegr,
# SVMCascadePCA,
Predictions,
VideoPredictions,
# MLP,
PadLDA
)
# gets sphinx autodoc done right - don't remove it
__all__ = [_ for _ in dir() if not _.startswith('_')]
import bob.bio.base
# define a queue with demanding parameters
grid = bob.bio.base.grid.Grid(
number_of_scoring_jobs=1,
number_of_enrollment_jobs=1,
training_queue='32G',
# preprocessing
preprocessing_queue='4G',
# feature extraction
extraction_queue='8G',
# feature projection
projection_queue='8G',
# model enrollment
enrollment_queue='8G',
# scoring
scoring_queue='8G'
)
from sklearn.base import TransformerMixin, BaseEstimator
import bob.pipelines as mario
import logging
logger = logging.getLogger(__name__)
class FrameContainersToFrames(TransformerMixin, BaseEstimator):
"""Expands frame containers to frame-based samples only when transform is called.
When fit_transform is called, it just converts frame containers to numpy arrays.
"""
def transform(self, video_samples):
logger.info(
f"Calling {self.__class__.__name__}.transform from FrameContainersToFrames"
)
output = []
for sample in video_samples: