diff git a/bob/pad/base/algorithm/LogRegr.py b/bob/pad/base/algorithm/LogRegr.py
new file mode 100644
index 0000000000000000000000000000000000000000..da1eb4e025740cb351c9f007fb738311e622240f
 /dev/null
+++ b/bob/pad/base/algorithm/LogRegr.py
@@ 0,0 +1,437 @@
+#!/usr/bin/env python2
+# * coding: utf8 *
+"""
+Created on Fri Aug 25 09:29:02 2017
+
+@author: Olegs Nikisins
+"""
+
+#==============================================================================
+# Import what is needed here:
+
+from bob.pad.base.algorithm import Algorithm
+
+from bob.bio.video.utils import FrameContainer
+
+import numpy as np
+
+from sklearn import linear_model
+
+import bob.io.base
+
+from bob.pad.base.utils import convert_frame_cont_to_array, convert_list_of_frame_cont_to_array, mean_std_normalize, \
+ norm_train_data
+
+
+#==============================================================================
+# Main body :
+
+
+class LogRegr(Algorithm):
+ """
+ This class is designed to train Logistic Regression classifier given Frame Containers
+ with features of real and attack classes. The procedure is the following:
+
+ 1. First, the input data is meanstd normalized using mean and std of the
+ real class only.
+
+ 2. Second, the Logistic Regression classifier is trained on normalized
+ input features.
+
+ 3. The input features are next classified using pretrained LR machine.
+
+ **Parameters:**
+
+ ``C`` : :py:class:`float`
+ Inverse of regularization strength in LR classifier; must be a positive.
+ Like in support vector machines, smaller values specify stronger
+ regularization. Default: 1.0 .
+
+ ``frame_level_scores_flag`` : :py:class:`bool`
+ Return scores for each frame individually if True. Otherwise, return a
+ single score per video. Default: ``False``.
+
+ ``subsample_train_data_flag`` : :py:class:`bool`
+ Uniformly subsample the training data if ``True``. Default: ``False``.
+
+ ``subsampling_step`` : :py:class:`int`
+ Training data subsampling step, only valid is
+ ``subsample_train_data_flag = True``. Default: 10 .
+
+ ``subsample_videos_flag`` : :py:class:`bool`
+ Uniformly subsample the training videos if ``True``. Default: ``False``.
+
+ ``video_subsampling_step`` : :py:class:`int`
+ Training videos subsampling step, only valid is
+ ``subsample_videos_flag = True``. Default: 3 .
+ """
+
+ def __init__(self,
+ C=1,
+ frame_level_scores_flag=False,
+ subsample_train_data_flag=False,
+ subsampling_step=10,
+ subsample_videos_flag=False,
+ video_subsampling_step=3):
+
+ Algorithm.__init__(
+ self,
+ C=C,
+ frame_level_scores_flag=frame_level_scores_flag,
+ subsample_train_data_flag=subsample_train_data_flag,
+ subsampling_step=subsampling_step,
+ subsample_videos_flag=subsample_videos_flag,
+ video_subsampling_step=video_subsampling_step,
+ performs_projection=True,
+ requires_projector_training=True)
+
+ self.C = C
+
+ self.frame_level_scores_flag = frame_level_scores_flag
+
+ self.subsample_train_data_flag = subsample_train_data_flag
+
+ self.subsampling_step = subsampling_step
+
+ self.subsample_videos_flag = subsample_videos_flag
+
+ self.video_subsampling_step = video_subsampling_step
+
+ self.lr_machine = None # this argument will be updated with pretrained LR machine
+
+ self.features_mean = None # this argument will be updated with features mean
+ self.features_std = None # this argument will be updated with features std
+
+ # names of the arguments of the pretrained LR machine to be saved/loaded to/from HDF5 file:
+ self.lr_param_keys = ["C", "classes_", "coef_", "intercept_"]
+
+
+ #==========================================================================
+ def train_lr(self, real, attack, C):
+ """
+ Train LR classifier given real and attack classes. Prior to training
+ the data is meanstd normalized.
+
+ **Parameters:**
+
+ ``real`` : 2D :py:class:`numpy.ndarray`
+ Training features for the real class.
+
+ ``attack`` : 2D :py:class:`numpy.ndarray`
+ Training features for the attack class.
+
+ ``C`` : :py:class:`float`
+ Inverse of regularization strength in LR classifier; must be a positive.
+ Like in support vector machines, smaller values specify stronger
+ regularization. Default: 1.0 .
+
+ **Returns:**
+
+ ``machine`` : object
+ A trained LR machine.
+
+ ``features_mean`` : 1D :py:class:`numpy.ndarray`
+ Mean of the features.
+
+ ``features_std`` : 1D :py:class:`numpy.ndarray`
+ Standart deviation of the features.
+ """
+
+ real, attack, features_mean, features_std = norm_train_data(
+ real, attack)
+ # real and attack  are now meanstd normalized
+
+ X = np.vstack([real, attack])
+
+ Y = np.hstack([np.zeros(len(real)), np.ones(len(attack))])
+
+ machine = linear_model.LogisticRegression(C=C)
+
+ machine.fit(X, Y)
+
+ return machine, features_mean, features_std
+
+ #==========================================================================
+ def save_lr_machine_and_mean_std(self, projector_file, machine,
+ features_mean, features_std):
+ """
+ Saves the LR machine, features mean and std to the hdf5 file.
+ The absolute name of the file is specified in ``projector_file`` string.
+
+ **Parameters:**
+
+ ``projector_file`` : :py:class:`str`
+ Absolute name of the file to save the data to, as returned by
+ ``bob.pad.base`` framework.
+
+ ``machine`` : object
+ The LR machine to be saved. As returned by sklearn.linear_model
+ module.
+
+ ``features_mean`` : 1D :py:class:`numpy.ndarray`
+ Mean of the features.
+
+ ``features_std`` : 1D :py:class:`numpy.ndarray`
+ Standart deviation of the features.
+ """
+
+ f = bob.io.base.HDF5File(projector_file,
+ 'w') # open hdf5 file to save to
+
+ for key in self.lr_param_keys: # ["C", "classes_", "coef_", "intercept_"]
+
+ data = getattr(machine, key)
+
+ f.set(key, data)
+
+ f.set("features_mean", features_mean)
+
+ f.set("features_std", features_std)
+
+ del f
+
+ #==========================================================================
+ def subsample_train_videos(self, training_features, step):
+ """
+ Uniformly select subset of frmae containes from the input list
+
+ **Parameters:**
+
+ ``training_features`` : [FrameContainer]
+ A list of FrameContainers
+
+ ``step`` : :py:class:`int`
+ Data selection step.
+
+ **Returns:**
+
+ ``training_features_subset`` : [FrameContainer]
+ A list with selected FrameContainers
+ """
+
+ indexes = range(0, len(training_features), step)
+
+ training_features_subset = [training_features[x] for x in indexes]
+
+ return training_features_subset
+
+ #==========================================================================
+ def train_projector(self, training_features, projector_file):
+ """
+ Train LR for feature projection and save them to files.
+ The ``requires_projector_training = True`` flag must be set to True
+ to enable this function.
+
+ **Parameters:**
+
+ ``training_features`` : [[FrameContainer], [FrameContainer]]
+ A list containing two elements: [0]  a list of Frame Containers with
+ feature vectors for the real class; [1]  a list of Frame Containers with
+ feature vectors for the attack class.
+
+ ``projector_file`` : :py:class:`str`
+ The file to save the trained projector to, as returned by the
+ ``bob.pad.base`` framework.
+ """
+
+ # training_features[0]  training features for the REAL class.
+ # training_features[1]  training features for the ATTACK class.
+
+ if self.subsample_videos_flag: # subsample videos of the real class
+
+ real = convert_list_of_frame_cont_to_array(
+ self.subsample_train_videos(
+ training_features[0],
+ self.video_subsampling_step)) # output is array
+
+ else:
+
+ real = convert_list_of_frame_cont_to_array(
+ training_features[0]) # output is array
+
+ if self.subsample_train_data_flag:
+
+ real = real[range(0, len(real), self.subsampling_step), :]
+
+ if self.subsample_videos_flag: # subsample videos of the real class
+
+ attack = convert_list_of_frame_cont_to_array(
+ self.subsample_train_videos(
+ training_features[1],
+ self.video_subsampling_step)) # output is array
+
+ else:
+
+ attack = convert_list_of_frame_cont_to_array(
+ training_features[1]) # output is array
+
+ if self.subsample_train_data_flag:
+
+ attack = attack[range(0, len(attack), self.subsampling_step), :]
+
+ # Train the LR machine and get normalizers:
+ machine, features_mean, features_std = self.train_lr(
+ real=real, attack=attack, C=self.C)
+
+ # Save the LR machine and normalizers:
+ self.save_lr_machine_and_mean_std(projector_file, machine,
+ features_mean, features_std)
+
+ #==========================================================================
+ def load_lr_machine_and_mean_std(self, projector_file):
+ """
+ Loads the machine, features mean and std from the hdf5 file.
+ The absolute name of the file is specified in ``projector_file`` string.
+
+ **Parameters:**
+
+ ``projector_file`` : :py:class:`str`
+ Absolute name of the file to load the trained projector from, as
+ returned by ``bob.pad.base`` framework.
+
+ **Returns:**
+
+ ``machine`` : object
+ The loaded LR machine. As returned by sklearn.linear_model module.
+
+ ``features_mean`` : 1D :py:class:`numpy.ndarray`
+ Mean of the features.
+
+ ``features_std`` : 1D :py:class:`numpy.ndarray`
+ Standart deviation of the features.
+ """
+
+ f = bob.io.base.HDF5File(projector_file,
+ 'r') # file to read the machine from
+
+ # initialize the machine:
+ machine = linear_model.LogisticRegression()
+
+ # set the params of the machine:
+ for key in self.lr_param_keys: # ["C", "classes_", "coef_", "intercept_"]
+
+ data = f.read(key)
+
+ setattr(machine, key, data)
+
+ features_mean = f.read("features_mean")
+
+ features_std = f.read("features_std")
+
+ del f
+
+ return machine, features_mean, features_std
+
+ #==========================================================================
+ def load_projector(self, projector_file):
+ """
+ Loads the machine, features mean and std from the hdf5 file.
+ The absolute name of the file is specified in ``projector_file`` string.
+
+ This function sets the arguments ``self.lr_machine``, ``self.features_mean``
+ and ``self.features_std`` of this class with loaded machines.
+
+ The function must be capable of reading the data saved with the
+ :py:meth:`train_projector` method of this class.
+
+ Please register `performs_projection = True` in the constructor to
+ enable this function.
+
+ **Parameters:**
+
+ ``projector_file`` : :py:class:`str`
+ The file to read the projector from, as returned by the
+ ``bob.pad.base`` framework. In this class the names of the files to
+ read the projectors from are modified, see ``load_machine`` and
+ ``load_cascade_of_machines`` methods of this class for more details.
+ """
+
+ lr_machine, features_mean, features_std = self.load_lr_machine_and_mean_std(
+ projector_file)
+
+ self.lr_machine = lr_machine
+
+ self.features_mean = features_mean
+
+ self.features_std = features_std
+
+ #==========================================================================
+ def project(self, feature):
+ """
+ This function computes a vector of scores for each sample in the input
+ array of features. The following steps are apllied:
+
+ 1. First, the input data is meanstd normalized using mean and std of the
+ real class only.
+
+ 2. The input features are next classified using pretrained LR machine.
+
+ Set ``performs_projection = True`` in the constructor to enable this function.
+ It is assured that the :py:meth:`load_projector` was **called before** the
+ ``project`` function is executed.
+
+ **Parameters:**
+
+ ``feature`` : FrameContainer or 2D :py:class:`numpy.ndarray`
+ Two types of inputs are accepted.
+ A Frame Container conteining the features of an individual,
+ see ``bob.bio.video.utils.FrameContainer``.
+ Or a 2D feature array of the size (N_samples x N_features).
+
+ **Returns:**
+
+ ``scores`` : 1D :py:class:`numpy.ndarray`
+ Vector of scores. Scores for the real class are expected to be
+ higher, than the scores of the negative / attack class.
+ In this case scores are probabilities.
+ """
+
+ # 1. Convert input array to numpy array if necessary.
+ if isinstance(
+ feature,
+ FrameContainer): # if FrameContainer convert to 2D numpy array
+
+ features_array = convert_frame_cont_to_array(feature)
+
+ else:
+
+ features_array = feature
+
+ features_array_norm, _, _ = mean_std_normalize(
+ features_array, self.features_mean, self.features_std)
+
+ scores = self.lr_machine.predict_proba(features_array_norm)[:, 0]
+
+ return scores
+
+ #==========================================================================
+ def score(self, toscore):
+ """
+ Returns a probability of a sample being a real class.
+
+ **Parameters:**
+
+ ``toscore`` : 1D :py:class:`numpy.ndarray`
+ Vector with scores for each frame/sample defining the probability
+ of the frame being a sample of the real class.
+
+ **Returns:**
+
+ ``score`` : [:py:class:`float`]
+ If ``frame_level_scores_flag = False`` a single score is returned.
+ One score per video. This score is placed into a list, because
+ the ``score`` must be an iterable.
+ Score is a probability of a sample being a real class.
+ If ``frame_level_scores_flag = True`` a list of scores is returned.
+ One score per frame/sample.
+ """
+
+ if self.frame_level_scores_flag:
+
+ score = list(toscore)
+
+ else:
+
+ score = [np.mean(toscore)] # compute a single score per video
+
+ return score
diff git a/bob/pad/base/algorithm/SVM.py b/bob/pad/base/algorithm/SVM.py
index c81e3ff013f3b3529549d31119420cdf532c8a23..3c1f356f82dc3fd58690144230a8a96006918ffb 100644
 a/bob/pad/base/algorithm/SVM.py
+++ b/bob/pad/base/algorithm/SVM.py
@@ 23,7 +23,8 @@ import bob.io.base
import os
from bob.pad.base.utils import convert_frame_cont_to_array, convert_and_prepare_features, combinations, \
 select_uniform_data_subset, select_quasi_uniform_data_subset
+ select_uniform_data_subset, select_quasi_uniform_data_subset, mean_std_normalize, split_data_to_train_cv, \
+ norm_train_cv_data, prepare_data_for_hyper_param_grid_search
# ==============================================================================
# Main body :
@@ 120,93 +121,6 @@ class SVM(Algorithm):
self.n_train_samples = n_train_samples
self.machine = None
 # ==========================================================================
 def split_data_to_train_cv(self, features):
 """
 This function is designed to split the input array of features into two
 subset namely train and crossvalidation. These subsets can be used to tune the
 hyperparameters of the SVM. The splitting is 50/50, the first half of the
 samples in the input are selected to be train set, and the second half of
 samples is crossvalidation.

 **Parameters:**

 ``features`` : 2D :py:class:`numpy.ndarray`
 Input array with feature vectors. The rows are samples, columns are features.

 **Returns:**

 ``features_train`` : 2D :py:class:`numpy.ndarray`
 Selected subset of train features.

 ``features_cv`` : 2D :py:class:`numpy.ndarray`
 Selected subset of crossvalidation features.
 """

 half_samples_num = np.int(features.shape[0] / 2)

 features_train = features[0:half_samples_num, :]
 features_cv = features[half_samples_num:2 * half_samples_num + 1, :]

 return features_train, features_cv

 # ==========================================================================
 def prepare_data_for_hyper_param_grid_search(self, training_features,
 n_samples):
 """
 This function converts a list of all training features returned by ``read_features``
 method of the extractor to the subsampled train and crossvalidation arrays for both
 real and attack classes.

 **Parameters:**

 ``training_features`` : [[FrameContainer], [FrameContainer]]
 A list containing two elements: [0]  a list of Frame Containers with
 feature vectors for the real class; [1]  a list of Frame Containers with
 feature vectors for the attack class.

 ``n_samples`` : :py:class:`int`
 Number of uniformly selected feature vectors per class.

 **Returns:**

 ``real_train`` : 2D :py:class:`numpy.ndarray`
 Selected subset of train features for the real class.
 The number of samples in this set is n_samples/2, which is defined
 by split_data_to_train_cv method of this class.

 ``real_cv`` : 2D :py:class:`numpy.ndarray`
 Selected subset of crossvalidation features for the real class.
 The number of samples in this set is n_samples/2, which is defined
 by split_data_to_train_cv method of this class.

 ``attack_train`` : 2D :py:class:`numpy.ndarray`
 Selected subset of train features for the attack class.
 The number of samples in this set is n_samples/2, which is defined
 by split_data_to_train_cv method of this class.

 ``attack_cv`` : 2D :py:class:`numpy.ndarray`
 Selected subset of crossvalidation features for the attack class.
 The number of samples in this set is n_samples/2, which is defined
 by split_data_to_train_cv method of this class.
 """

 # training_features[0]  training features for the REAL class.
 real = convert_and_prepare_features(
 training_features[0]) # output is array
 # training_features[1]  training features for the ATTACK class.
 attack = convert_and_prepare_features(
 training_features[1]) # output is array

 # uniformly select subsets of features:
 real_subset = select_uniform_data_subset(real, n_samples)
 attack_subset = select_uniform_data_subset(attack, n_samples)

 # split the data into train and crossvalidation:
 real_train, real_cv = self.split_data_to_train_cv(real_subset)
 attack_train, attack_cv = self.split_data_to_train_cv(attack_subset)

 return real_train, real_cv, attack_train, attack_cv
# ==========================================================================
def comp_prediction_precision(self, machine, real, attack):
@@ 242,138 +156,6 @@ class SVM(Algorithm):
return precision
 # ==========================================================================
 def mean_std_normalize(self,
 features,
 features_mean=None,
 features_std=None):
 """
 The features in the input 2D array are meanstd normalized.
 The rows are samples, the columns are features. If ``features_mean``
 and ``features_std`` are provided, then these vectors will be used for
 normalization. Otherwise, the mean and std of the features is
 computed on the fly.

 **Parameters:**

 ``features`` : 2D :py:class:`numpy.ndarray`
 Array of features to be normalized.

 ``features_mean`` : 1D :py:class:`numpy.ndarray`
 Mean of the features. Default: None.

 ``features_std`` : 2D :py:class:`numpy.ndarray`
 Standart deviation of the features. Default: None.

 **Returns:**

 ``features_norm`` : 2D :py:class:`numpy.ndarray`
 Normalized array of features.

 ``features_mean`` : 1D :py:class:`numpy.ndarray`
 Mean of the features.

 ``features_std`` : 2D :py:class:`numpy.ndarray`
 Standart deviation of the features.
 """

 features = np.copy(features)

 # Compute mean and std if not given:
 if features_mean is None:
 features_mean = np.mean(features, axis=0)

 features_std = np.std(features, axis=0)

 row_norm_list = []

 for row in features: # row is a sample

 row_norm = (row  features_mean) / features_std

 row_norm_list.append(row_norm)

 features_norm = np.vstack(row_norm_list)

 return features_norm, features_mean, features_std

 # ==========================================================================
 def norm_train_cv_data(self,
 real_train,
 real_cv,
 attack_train,
 attack_cv,
 one_class_flag=False):
 """
 Meanstd normalization of train and crossvalidation data arrays.

 **Parameters:**

 ``real_train`` : 2D :py:class:`numpy.ndarray`
 Subset of train features for the real class.

 ``real_cv`` : 2D :py:class:`numpy.ndarray`
 Subset of crossvalidation features for the real class.

 ``attack_train`` : 2D :py:class:`numpy.ndarray`
 Subset of train features for the attack class.

 ``attack_cv`` : 2D :py:class:`numpy.ndarray`
 Subset of crossvalidation features for the attack class.

 ``one_class_flag`` : :py:class:`bool`
 If set to ``True``, only positive/real samples will be used to
 compute the mean and std normalization vectors. Set to ``True`` if
 using oneclass SVM. Default: False.

 **Returns:**

 ``real_train_norm`` : 2D :py:class:`numpy.ndarray`
 Normalized subset of train features for the real class.

 ``real_cv_norm`` : 2D :py:class:`numpy.ndarray`
 Normalized subset of crossvalidation features for the real class.

 ``attack_train_norm`` : 2D :py:class:`numpy.ndarray`
 Normalized subset of train features for the attack class.

 ``attack_cv_norm`` : 2D :py:class:`numpy.ndarray`
 Normalized subset of crossvalidation features for the attack class.
 """
 if not (one_class_flag):

 features_train = np.vstack([real_train, attack_train])

 features_train_norm, features_mean, features_std = self.mean_std_normalize(
 features_train)

 real_train_norm = features_train_norm[0:real_train.shape[0], :]

 attack_train_norm = features_train_norm[real_train.shape[0]:, :]

 real_cv_norm, _, _ = self.mean_std_normalize(
 real_cv, features_mean, features_std)

 attack_cv_norm, _, _ = self.mean_std_normalize(
 attack_cv, features_mean, features_std)

 else: # oneclass SVM case

 # only real class used for training in one class SVM:
 real_train_norm, features_mean, features_std = self.mean_std_normalize(
 real_train)

 attack_train_norm, _, _ = self.mean_std_normalize(
 attack_train, features_mean, features_std)

 real_cv_norm, _, _ = self.mean_std_normalize(
 real_cv, features_mean, features_std)

 attack_cv_norm, _, _ = self.mean_std_normalize(
 attack_cv, features_mean, features_std)

 return real_train_norm, real_cv_norm, attack_train_norm, attack_cv_norm

# ==========================================================================
def train_svm(
self,
@@ 453,12 +235,12 @@ class SVM(Algorithm):
machine_type == 'ONE_CLASS') # True if oneclass SVM is used
# get the data for the hyperparameter gridsearch:
 real_train, real_cv, attack_train, attack_cv = self.prepare_data_for_hyper_param_grid_search(
 training_features, n_samples)
+ real_train, real_cv, attack_train, attack_cv = \
+ prepare_data_for_hyper_param_grid_search(training_features, n_samples)
if mean_std_norm_flag:
# normalize the data:
 real_train, real_cv, attack_train, attack_cv = self.norm_train_cv_data(
+ real_train, real_cv, attack_train, attack_cv = norm_train_cv_data(
real_train, real_cv, attack_train, attack_cv, one_class_flag)
precisions_cv = [
@@ 554,7 +336,7 @@ class SVM(Algorithm):
if not (one_class_flag): # twoclass SVM case
features = np.vstack([real, attack])
 features_norm, features_mean, features_std = self.mean_std_normalize(
+ features_norm, features_mean, features_std = mean_std_normalize(
features)
real = features_norm[0:real.shape[
0], :] # The array is now normalized
@@ 563,9 +345,9 @@ class SVM(Algorithm):
else: # oneclass SVM case
 real, features_mean, features_std = self.mean_std_normalize(
+ real, features_mean, features_std = mean_std_normalize(
real) # use only real class to compute normalizers
 attack = self.mean_std_normalize(attack, features_mean,
+ attack = mean_std_normalize(attack, features_mean,
features_std)
# ``real`` and ``attack`` arrays are now normalizaed
diff git a/bob/pad/base/algorithm/__init__.py b/bob/pad/base/algorithm/__init__.py
index c146db024bbacc42756ed7946ad8659736735083..db4ab762f253563a27b38d6410e91c0b133d53aa 100644
 a/bob/pad/base/algorithm/__init__.py
+++ b/bob/pad/base/algorithm/__init__.py
@@ 1,6 +1,7 @@
from .Algorithm import Algorithm
from .SVM import SVM
from OneClassGMM import OneClassGMM
+from LogRegr import LogRegr
def __appropriate__(*args):
"""Says object was actually declared here, and not in the import module.
@@ 24,5 +25,6 @@ __appropriate__(
Algorithm,
SVM,
OneClassGMM,
+ LogRegr,
)
__all__ = [_ for _ in dir() if not _.startswith('_')]
diff git a/bob/pad/base/utils/helper_functions.py b/bob/pad/base/utils/helper_functions.py
index cc371c23b9ff060a04b26a6398bf6fe43dc6589a..3087e4b10f221537a643183ecea06e3144b03b66 100644
 a/bob/pad/base/utils/helper_functions.py
+++ b/bob/pad/base/utils/helper_functions.py
@@ 278,4 +278,204 @@ def mean_std_normalize(features,
features_norm = np.vstack(row_norm_list)
 return features_norm, features_mean, features_std
\ No newline at end of file
+ return features_norm, features_mean, features_std
+
+
+def norm_train_data(real, attack):
+ """
+ Meanstd normalization of input data arrays. The mean and std normalizers
+ are computed using real class only.
+
+ **Parameters:**
+
+ ``real`` : 2D :py:class:`numpy.ndarray`
+ Training features for the real class.
+
+ ``attack`` : 2D :py:class:`numpy.ndarray`
+ Training features for the attack class.
+
+ **Returns:**
+
+ ``real_norm`` : 2D :py:class:`numpy.ndarray`
+ Meanstd normalized training features for the real class.
+
+ ``attack_norm`` : 2D :py:class:`numpy.ndarray`
+ Meanstd normalized training features for the attack class.
+ Or an empty list if ``one_class_flag = True``.
+
+ ``features_mean`` : 1D :py:class:`numpy.ndarray`
+ Mean of the features.
+
+ ``features_std`` : 1D :py:class:`numpy.ndarray`
+ Standart deviation of the features.
+ """
+
+ real_norm, features_mean, features_std = mean_std_normalize(real)
+
+ attack_norm, _, _ = mean_std_normalize(attack, features_mean,
+ features_std)
+
+ return real_norm, attack_norm, features_mean, features_std
+
+
+def split_data_to_train_cv(features):
+ """
+ This function is designed to split the input array of features into two
+ subset namely train and crossvalidation. These subsets can be used to tune the
+ hyperparameters of the SVM. The splitting is 50/50, the first half of the
+ samples in the input are selected to be train set, and the second half of
+ samples is crossvalidation.
+
+ **Parameters:**
+
+ ``features`` : 2D :py:class:`numpy.ndarray`
+ Input array with feature vectors. The rows are samples, columns are features.
+
+ **Returns:**
+
+ ``features_train`` : 2D :py:class:`numpy.ndarray`
+ Selected subset of train features.
+
+ ``features_cv`` : 2D :py:class:`numpy.ndarray`
+ Selected subset of crossvalidation features.
+ """
+
+ half_samples_num = np.int(features.shape[0] / 2)
+
+ features_train = features[0:half_samples_num, :]
+ features_cv = features[half_samples_num:2 * half_samples_num + 1, :]
+
+ return features_train, features_cv
+
+
+def norm_train_cv_data(real_train,
+ real_cv,
+ attack_train,
+ attack_cv,
+ one_class_flag=False):
+ """
+ Meanstd normalization of train and crossvalidation data arrays.
+
+ **Parameters:**
+
+ ``real_train`` : 2D :py:class:`numpy.ndarray`
+ Subset of train features for the real class.
+
+ ``real_cv`` : 2D :py:class:`numpy.ndarray`
+ Subset of crossvalidation features for the real class.
+
+ ``attack_train`` : 2D :py:class:`numpy.ndarray`
+ Subset of train features for the attack class.
+
+ ``attack_cv`` : 2D :py:class:`numpy.ndarray`
+ Subset of crossvalidation features for the attack class.
+
+ ``one_class_flag`` : :py:class:`bool`
+ If set to ``True``, only positive/real samples will be used to
+ compute the mean and std normalization vectors. Set to ``True`` if
+ using oneclass SVM. Default: False.
+
+ **Returns:**
+
+ ``real_train_norm`` : 2D :py:class:`numpy.ndarray`
+ Normalized subset of train features for the real class.
+
+ ``real_cv_norm`` : 2D :py:class:`numpy.ndarray`
+ Normalized subset of crossvalidation features for the real class.
+
+ ``attack_train_norm`` : 2D :py:class:`numpy.ndarray`
+ Normalized subset of train features for the attack class.
+
+ ``attack_cv_norm`` : 2D :py:class:`numpy.ndarray`
+ Normalized subset of crossvalidation features for the attack class.
+ """
+ if not (one_class_flag):
+
+ features_train = np.vstack([real_train, attack_train])
+
+ features_train_norm, features_mean, features_std = mean_std_normalize(
+ features_train)
+
+ real_train_norm = features_train_norm[0:real_train.shape[0], :]
+
+ attack_train_norm = features_train_norm[real_train.shape[0]:, :]
+
+ real_cv_norm, _, _ = mean_std_normalize(
+ real_cv, features_mean, features_std)
+
+ attack_cv_norm, _, _ = mean_std_normalize(
+ attack_cv, features_mean, features_std)
+
+ else: # oneclass Classifier case
+
+ # only real class used for training in one class Classifier:
+ real_train_norm, features_mean, features_std = mean_std_normalize(
+ real_train)
+
+ attack_train_norm, _, _ = mean_std_normalize(
+ attack_train, features_mean, features_std)
+
+ real_cv_norm, _, _ = mean_std_normalize(
+ real_cv, features_mean, features_std)
+
+ attack_cv_norm, _, _ = mean_std_normalize(
+ attack_cv, features_mean, features_std)
+
+ return real_train_norm, real_cv_norm, attack_train_norm, attack_cv_norm
+
+
+def prepare_data_for_hyper_param_grid_search(training_features, n_samples):
+ """
+ This function converts a list of all training features returned by ``read_features``
+ method of the extractor to the subsampled train and crossvalidation arrays for both
+ real and attack classes.
+
+ **Parameters:**
+
+ ``training_features`` : [[FrameContainer], [FrameContainer]]
+ A list containing two elements: [0]  a list of Frame Containers with
+ feature vectors for the real class; [1]  a list of Frame Containers with
+ feature vectors for the attack class.
+
+ ``n_samples`` : :py:class:`int`
+ Number of uniformly selected feature vectors per class.
+
+ **Returns:**
+
+ ``real_train`` : 2D :py:class:`numpy.ndarray`
+ Selected subset of train features for the real class.
+ The number of samples in this set is n_samples/2, which is defined
+ by split_data_to_train_cv method of this class.
+
+ ``real_cv`` : 2D :py:class:`numpy.ndarray`
+ Selected subset of crossvalidation features for the real class.
+ The number of samples in this set is n_samples/2, which is defined
+ by split_data_to_train_cv method of this class.
+
+ ``attack_train`` : 2D :py:class:`numpy.ndarray`
+ Selected subset of train features for the attack class.
+ The number of samples in this set is n_samples/2, which is defined
+ by split_data_to_train_cv method of this class.
+
+ ``attack_cv`` : 2D :py:class:`numpy.ndarray`
+ Selected subset of crossvalidation features for the attack class.
+ The number of samples in this set is n_samples/2, which is defined
+ by split_data_to_train_cv method of this class.
+ """
+
+ # training_features[0]  training features for the REAL class.
+ real = convert_and_prepare_features(
+ training_features[0]) # output is array
+ # training_features[1]  training features for the ATTACK class.
+ attack = convert_and_prepare_features(
+ training_features[1]) # output is array
+
+ # uniformly select subsets of features:
+ real_subset = select_uniform_data_subset(real, n_samples)
+ attack_subset = select_uniform_data_subset(attack, n_samples)
+
+ # split the data into train and crossvalidation:
+ real_train, real_cv = split_data_to_train_cv(real_subset)
+ attack_train, attack_cv = split_data_to_train_cv(attack_subset)
+
+ return real_train, real_cv, attack_train, attack_cv
\ No newline at end of file
diff git a/conda/meta.yaml b/conda/meta.yaml
index 852c823f16a4043296f9c7b8c50f860222a6f312..564d295d112f5c05e8c5c3658279b240a54c7c87 100644
 a/conda/meta.yaml
+++ b/conda/meta.yaml
@@ 52,6 +52,7 @@ test:
 coverage
 sphinx
 sphinx_rtd_theme
+  sklearn
about:
home: https://www.idiap.ch/software/bob/