Skip to content
Snippets Groups Projects
Commit 1274fe70 authored by Amir MOHAMMADI's avatar Amir MOHAMMADI
Browse files

Merge branch 'add-new-classifiers' into 'master'

Add new classification algorithms

See merge request !50
parents 5d13471a cbf1f1c5
No related branches found
No related tags found
1 merge request!50Add new classification algorithms
Pipeline #
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
import numpy
from bob.pad.base.algorithm import Algorithm
import bob.learn.mlp
import bob.io.base
from bob.bio.video.utils import FrameContainer
from bob.pad.base.utils import convert_frame_cont_to_array
from bob.core.log import setup
logger = setup("bob.pad.base")
class MLP(Algorithm):
"""Interfaces an MLP classifier used for PAD
Attributes
----------
hidden_units : :py:obj:`tuple` of :any:`int`
The number of hidden units in each hidden layer
max_iter : :any:`int`
The maximum number of training iterations
precision : :any:`float`
criterion to stop the training: if the difference
between current and last loss is smaller than
this number, then stop training.
"""
def __init__(self, hidden_units=(10, 10), max_iter=1000, precision=0.001, **kwargs):
"""Init function
Parameters
----------
hidden_units : :py:obj:`tuple` of int
The number of hidden units in each hidden layer
max_iter : int
The maximum number of training iterations
precision : float
criterion to stop the training: if the difference
between current and last loss is smaller than
this number, then stop training.
"""
Algorithm.__init__(self,
performs_projection=True,
requires_projector_training=True,
**kwargs)
self.hidden_units = hidden_units
self.max_iter = max_iter
self.precision = precision
self.mlp = None
def train_projector(self, training_features, projector_file):
"""Trains the MLP
Parameters
----------
training_features : :any:`list` of :py:class:`numpy.ndarray`
Data used to train the MLP. The real attempts are in training_features[0] and the attacks are in training_features[1]
projector_file : str
Filename where to save the trained model.
"""
# training is done in batch (i.e. using all training data)
batch_size = len(training_features[0]) + len(training_features[1])
# The labels
label_real = numpy.zeros((len(training_features[0]), 2), dtype='float64')
label_real[:, 0] = 1
label_attack = numpy.zeros((len(training_features[1]), 2), dtype='float64')
label_attack[:, 1] = 0
real = numpy.array(training_features[0])
attack = numpy.array(training_features[1])
X = numpy.vstack([real, attack])
Y = numpy.vstack([label_real, label_attack])
# Building MLP architecture
input_dim = real.shape[1]
shape = []
shape.append(input_dim)
for i in range(len(self.hidden_units)):
shape.append(self.hidden_units[i])
# last layer contains two units: one for each class (i.e. real and attack)
shape.append(2)
shape = tuple(shape)
self.mlp = bob.learn.mlp.Machine(shape)
self.mlp.output_activation = bob.learn.activation.Logistic()
self.mlp.randomize()
trainer = bob.learn.mlp.BackProp(batch_size, bob.learn.mlp.CrossEntropyLoss(self.mlp.output_activation), self.mlp, train_biases=True)
n_iter = 0
previous_cost = 0
current_cost = 1
while (n_iter < self.max_iter) and (abs(previous_cost - current_cost) > self.precision):
previous_cost = current_cost
trainer.train(self.mlp, X, Y)
current_cost = trainer.cost(self.mlp, X, Y)
n_iter += 1
logger.debug("Iteration {} -> cost = {} (previous = {}, max_iter = {})".format(n_iter, trainer.cost(self.mlp, X, Y), previous_cost, self.max_iter))
f = bob.io.base.HDF5File(projector_file, 'w')
self.mlp.save(f)
def project(self, feature):
"""Project the given feature
Parameters
----------
feature : :py:class:`numpy.ndarray`
The feature to classify
Returns
-------
numpy.ndarray
The value of the two units in the last layer of the MLP.
"""
# if isinstance(feature, FrameContainer):
# feature = convert_frame_cont_to_array(feature)
return self.mlp(feature)
def score(self, toscore):
"""Returns the probability of the real class.
Parameters
----------
toscore : :py:class:`numpy.ndarray`
Returns
-------
float
probability of the authentication attempt to be real.
"""
if toscore.ndim == 1:
return [toscore[0]]
else:
return numpy.mean([toscore[:, 0]])
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
import numpy
from bob.bio.base.algorithm import LDA
class PadLDA(LDA):
"""Wrapper for bob.bio.base.algorithm.LDA,
Here, LDA is used in a PAD context. This means that the feature
will be projected on a single dimension subspace, which acts as a score
For more details, you may want to have a look at
`bob.learn.linear Documentation`_
.. _bob.learn.linear Documentation:
https://www.idiap.ch/software/bob/docs/bob/bob.learn.linear/stable/index.html
Attributes
----------
lda_subspace_dimension : int
the dimension of the LDA subspace. In the PAD case, the default
value is *always* used, and corresponds to the number of classes
in the training set (i.e. 2).
pca_subspace_dimension : int
The dimension of the PCA subspace to be applied
before on the data, before applying LDA.
use_pinv : bool
Use the pseudo-inverse in LDA computation.
"""
def __init__(self,
lda_subspace_dimension = None, # if set, the LDA subspace will be truncated to the given number of dimensions; by default it is limited to the number of classes in the training set
pca_subspace_dimension = None, # if set, a PCA subspace truncation is performed before applying LDA; might be integral or float
use_pinv = False,
**kwargs
):
"""Init function
Parameters
----------
lda_subspace_dimension : int
the dimension of the LDA subspace. In the PAD case, the default
value is *always* used, and corresponds to the number of classes
in the training set (i.e. 2).
pca_subspace_dimension : int
The dimension of the PCA subspace to be applied
before on the data, before applying LDA.
use_pinv : bool
Use the pseudo-inverse in LDA computation.
"""
super(PadLDA, self).__init__(
lda_subspace_dimension = lda_subspace_dimension,
pca_subspace_dimension = pca_subspace_dimension,
use_pinv = use_pinv,
**kwargs
)
def read_toscore_object(self, toscore_object_file):
"""Reads the toscore_object feature from a file.
By default, the toscore_object feature is identical to the projected feature.
Hence, this base class implementation simply calls :py:meth:`bob.pad.base.algorithm.Algorithm.read_feature`.
If your algorithm requires different behavior, please overwrite this function.
Parameters
----------
toscore_object_file : str or :py:class:`bob.io.base.HDF5File`
The file open for reading, or the file name to read from.
Returns
-------
object:
The toscore_object that was read from file.
"""
return self.read_feature(toscore_object_file)
def score(self, toscore):
return [toscore[0]]
......@@ -5,6 +5,8 @@ from .LogRegr import LogRegr
from .SVMCascadePCA import SVMCascadePCA
from .Predictions import Predictions
from .MLP import MLP
from .PadLDA import PadLDA
# to fix sphinx warnings of not able to find classes, when path is shortened
def __appropriate__(*args):
......@@ -32,6 +34,8 @@ __appropriate__(
LogRegr,
SVMCascadePCA,
Predictions,
MLP,
PadLDA
)
# gets sphinx autodoc done right - don't remove it
......
......@@ -12,6 +12,8 @@ import bob.pad.base
from bob.pad.base.algorithm import SVM
from bob.pad.base.algorithm import OneClassGMM
from bob.pad.base.algorithm import MLP
from bob.pad.base.algorithm import PadLDA
import random
......@@ -173,3 +175,61 @@ def test_convert_list_of_frame_cont_to_array():
assert isinstance(features_array[0], np.ndarray)
features_fm = convert_array_to_list_of_frame_cont(real_array)
assert isinstance(features_fm[0], bob.bio.video.FrameContainer)
def test_MLP():
"""
Test the MLP PAD algorithm.
"""
random.seed(7)
N = 20000
mu = 1
sigma = 1
real_array = np.transpose(
np.vstack([[random.gauss(mu, sigma) for _ in range(N)],
[random.gauss(mu, sigma) for _ in range(N)]]))
mu = 5
sigma = 1
attack_array = np.transpose(
np.vstack([[random.gauss(mu, sigma) for _ in range(N)],
[random.gauss(mu, sigma) for _ in range(N)]]))
training_features = [real_array, attack_array]
mlp = MLP(max_iter=100)
mlp.train_projector(training_features, '/tmp/mlp.hdf5')
real_sample = real_array[0]
prob = mlp.project(real_sample)
assert prob[0] > prob[1]
def test_LDA():
"""
Test the LDA PAD algorithm.
"""
random.seed(7)
N = 20000
mu = 1
sigma = 1
real_array = np.transpose(
np.vstack([[random.gauss(mu, sigma) for _ in range(N)],
[random.gauss(mu, sigma) for _ in range(N)]]))
mu = 5
sigma = 1
attack_array = np.transpose(
np.vstack([[random.gauss(mu, sigma) for _ in range(N)],
[random.gauss(mu, sigma) for _ in range(N)]]))
training_features = [real_array, attack_array]
lda = PadLDA()
lda.train_projector(training_features, '/tmp/lda.hdf5')
assert lda.machine.shape == (2, 1)
......@@ -29,6 +29,7 @@ requirements:
- bob.bio.video
- bob.learn.linear
- bob.learn.libsvm
- bob.learn.mlp
- scikit-learn
run:
- python
......
......@@ -28,6 +28,7 @@ Algorithm
.. autosummary::
bob.pad.base.tools.train_projector
bob.pad.base.tools.project
bob.pad.base.algorithm
Scoring
~~~~~~~
......
......@@ -7,4 +7,5 @@ bob.bio.base
bob.bio.video
bob.learn.libsvm
bob.learn.linear
scikit-learn
\ No newline at end of file
bob.learn.mlp
scikit-learn
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment