From 69a14c63b81767cc45918b15371d5e36f93a9179 Mon Sep 17 00:00:00 2001 From: Pavel Korshunov <pavel.korshunov@idiap.ch> Date: Tue, 13 Feb 2018 11:38:34 +0100 Subject: [PATCH] moved SVM algorithm from bob.pad.face --- bob/pad/base/algorithm/SVM.py | 781 +++++++++++++++++++++++++ bob/pad/base/test/test_algorithms.py | 83 +++ bob/pad/base/utils/__init__.py | 3 + bob/pad/base/utils/helper_functions.py | 226 +++++++ 4 files changed, 1093 insertions(+) create mode 100644 bob/pad/base/algorithm/SVM.py create mode 100644 bob/pad/base/test/test_algorithms.py create mode 100644 bob/pad/base/utils/__init__.py create mode 100644 bob/pad/base/utils/helper_functions.py diff --git a/bob/pad/base/algorithm/SVM.py b/bob/pad/base/algorithm/SVM.py new file mode 100644 index 0000000..c81e3ff --- /dev/null +++ b/bob/pad/base/algorithm/SVM.py @@ -0,0 +1,781 @@ +#!/usr/bin/env python2 +# -*- coding: utf-8 -*- +""" +Created on Wed May 17 09:43:09 2017 + +@author: Olegs Nikisins +""" + +# ============================================================================== +# Import what is needed here: + +from bob.pad.base.algorithm import Algorithm +from bob.bio.video.utils import FrameContainer + +import itertools as it + +import numpy as np + +import bob.learn.libsvm + +import bob.io.base + +import os + +from bob.pad.base.utils import convert_frame_cont_to_array, convert_and_prepare_features, combinations, \ + select_uniform_data_subset, select_quasi_uniform_data_subset + +# ============================================================================== +# Main body : + + +class SVM(Algorithm): + """ + This class is designed to train SVM given features (either numpy arrays or Frame Containers) + from real and attack classes. The trained SVM is then used to classify the + testing data as either real or attack. The SVM is trained in two stages. + First, the best parameters for SVM are estimated using train and + cross-validation subsets. The size of the subsets used in hyper-parameter + tuning is defined by ``n_samples`` parameter of this class. Once best + parameters are determined, the SVM machine is trained using complete training + set. + + **Parameters:** + + ``machine_type`` : :py:class:`str` + A type of the SVM machine. Please check ``bob.learn.libsvm`` for + more details. Default: 'C_SVC'. + + ``kernel_type`` : :py:class:`str` + A type of kerenel for the SVM machine. Please check ``bob.learn.libsvm`` + for more details. Default: 'RBF'. + + ``n_samples`` : :py:class:`int` + Number of uniformly selected feature vectors per class defining the + sizes of sub-sets used in the hyper-parameter grid search. + + ``trainer_grid_search_params`` : :py:class:`dict` + Dictionary containing the hyper-parameters of the SVM to be tested + in the grid-search. + Default: {'cost': [2**p for p in range(-5, 16, 2)], 'gamma': [2**p for p in range(-15, 4, 2)]}. + + ``mean_std_norm_flag`` : :py:class:`bool` + Perform mean-std normalization of data if set to True. Default: False. + + ``frame_level_scores_flag`` : :py:class:`bool` + Return scores for each frame individually if True. Otherwise, return a + single score per video. Should be used only when features are in Frame Containers. Default: False. + + ``save_debug_data_flag`` : :py:class:`bool` + Save the data, which might be usefull for debugging if ``True``. + Default: ``True``. + + ``reduced_train_data_flag`` : :py:class:`bool` + Reduce the amount of final training samples if set to ``True``. + Default: ``False``. + + ``n_train_samples`` : :py:class:`int` + Number of uniformly selected feature vectors per class defining the + sizes of sub-sets used in the final traing of the SVM. + Default: 50000. + """ + + def __init__( + self, + machine_type='C_SVC', + kernel_type='RBF', + n_samples=10000, + trainer_grid_search_params={ + 'cost': [2**p for p in range(-5, 16, 2)], + 'gamma': [2**p for p in range(-15, 4, 2)] + }, + mean_std_norm_flag=False, + frame_level_scores_flag=False, + save_debug_data_flag=True, + reduced_train_data_flag=False, + n_train_samples=50000): + + Algorithm.__init__( + self, + machine_type=machine_type, + kernel_type=kernel_type, + n_samples=n_samples, + trainer_grid_search_params=trainer_grid_search_params, + mean_std_norm_flag=mean_std_norm_flag, + frame_level_scores_flag=frame_level_scores_flag, + save_debug_data_flag=save_debug_data_flag, + reduced_train_data_flag=reduced_train_data_flag, + n_train_samples=n_train_samples, + performs_projection=True, + requires_projector_training=True) + + self.machine_type = machine_type + self.kernel_type = kernel_type + self.n_samples = n_samples + self.trainer_grid_search_params = trainer_grid_search_params + self.mean_std_norm_flag = mean_std_norm_flag + self.frame_level_scores_flag = frame_level_scores_flag + self.save_debug_data_flag = save_debug_data_flag + self.reduced_train_data_flag = reduced_train_data_flag + self.n_train_samples = n_train_samples + self.machine = None + + # ========================================================================== + def split_data_to_train_cv(self, features): + """ + This function is designed to split the input array of features into two + subset namely train and cross-validation. These subsets can be used to tune the + hyper-parameters of the SVM. The splitting is 50/50, the first half of the + samples in the input are selected to be train set, and the second half of + samples is cross-validation. + + **Parameters:** + + ``features`` : 2D :py:class:`numpy.ndarray` + Input array with feature vectors. The rows are samples, columns are features. + + **Returns:** + + ``features_train`` : 2D :py:class:`numpy.ndarray` + Selected subset of train features. + + ``features_cv`` : 2D :py:class:`numpy.ndarray` + Selected subset of cross-validation features. + """ + + half_samples_num = np.int(features.shape[0] / 2) + + features_train = features[0:half_samples_num, :] + features_cv = features[half_samples_num:2 * half_samples_num + 1, :] + + return features_train, features_cv + + # ========================================================================== + def prepare_data_for_hyper_param_grid_search(self, training_features, + n_samples): + """ + This function converts a list of all training features returned by ``read_features`` + method of the extractor to the subsampled train and cross-validation arrays for both + real and attack classes. + + **Parameters:** + + ``training_features`` : [[FrameContainer], [FrameContainer]] + A list containing two elements: [0] - a list of Frame Containers with + feature vectors for the real class; [1] - a list of Frame Containers with + feature vectors for the attack class. + + ``n_samples`` : :py:class:`int` + Number of uniformly selected feature vectors per class. + + **Returns:** + + ``real_train`` : 2D :py:class:`numpy.ndarray` + Selected subset of train features for the real class. + The number of samples in this set is n_samples/2, which is defined + by split_data_to_train_cv method of this class. + + ``real_cv`` : 2D :py:class:`numpy.ndarray` + Selected subset of cross-validation features for the real class. + The number of samples in this set is n_samples/2, which is defined + by split_data_to_train_cv method of this class. + + ``attack_train`` : 2D :py:class:`numpy.ndarray` + Selected subset of train features for the attack class. + The number of samples in this set is n_samples/2, which is defined + by split_data_to_train_cv method of this class. + + ``attack_cv`` : 2D :py:class:`numpy.ndarray` + Selected subset of cross-validation features for the attack class. + The number of samples in this set is n_samples/2, which is defined + by split_data_to_train_cv method of this class. + """ + + # training_features[0] - training features for the REAL class. + real = convert_and_prepare_features( + training_features[0]) # output is array + # training_features[1] - training features for the ATTACK class. + attack = convert_and_prepare_features( + training_features[1]) # output is array + + # uniformly select subsets of features: + real_subset = select_uniform_data_subset(real, n_samples) + attack_subset = select_uniform_data_subset(attack, n_samples) + + # split the data into train and cross-validation: + real_train, real_cv = self.split_data_to_train_cv(real_subset) + attack_train, attack_cv = self.split_data_to_train_cv(attack_subset) + + return real_train, real_cv, attack_train, attack_cv + + # ========================================================================== + def comp_prediction_precision(self, machine, real, attack): + """ + This function computes the precision of the predictions as a ratio + of correctly classified samples to the total number of samples. + + **Parameters:** + + ``machine`` : object + A pre-trained SVM machine. + + ``real`` : 2D :py:class:`numpy.ndarray` + Array of features representing the real class. + + ``attack`` : 2D :py:class:`numpy.ndarray` + Array of features representing the attack class. + + **Returns:** + + ``precision`` : :py:class:`float` + The precision of the predictions. + """ + + labels_real = machine.predict_class(real) + + labels_attack = machine.predict_class(attack) + + samples_num = len(labels_real) + len(labels_attack) + + precision = (np.sum(labels_real == 1) + np.sum(labels_attack == -1) + ).astype(np.float) / samples_num + + return precision + + # ========================================================================== + def mean_std_normalize(self, + features, + features_mean=None, + features_std=None): + """ + The features in the input 2D array are mean-std normalized. + The rows are samples, the columns are features. If ``features_mean`` + and ``features_std`` are provided, then these vectors will be used for + normalization. Otherwise, the mean and std of the features is + computed on the fly. + + **Parameters:** + + ``features`` : 2D :py:class:`numpy.ndarray` + Array of features to be normalized. + + ``features_mean`` : 1D :py:class:`numpy.ndarray` + Mean of the features. Default: None. + + ``features_std`` : 2D :py:class:`numpy.ndarray` + Standart deviation of the features. Default: None. + + **Returns:** + + ``features_norm`` : 2D :py:class:`numpy.ndarray` + Normalized array of features. + + ``features_mean`` : 1D :py:class:`numpy.ndarray` + Mean of the features. + + ``features_std`` : 2D :py:class:`numpy.ndarray` + Standart deviation of the features. + """ + + features = np.copy(features) + + # Compute mean and std if not given: + if features_mean is None: + features_mean = np.mean(features, axis=0) + + features_std = np.std(features, axis=0) + + row_norm_list = [] + + for row in features: # row is a sample + + row_norm = (row - features_mean) / features_std + + row_norm_list.append(row_norm) + + features_norm = np.vstack(row_norm_list) + + return features_norm, features_mean, features_std + + # ========================================================================== + def norm_train_cv_data(self, + real_train, + real_cv, + attack_train, + attack_cv, + one_class_flag=False): + """ + Mean-std normalization of train and cross-validation data arrays. + + **Parameters:** + + ``real_train`` : 2D :py:class:`numpy.ndarray` + Subset of train features for the real class. + + ``real_cv`` : 2D :py:class:`numpy.ndarray` + Subset of cross-validation features for the real class. + + ``attack_train`` : 2D :py:class:`numpy.ndarray` + Subset of train features for the attack class. + + ``attack_cv`` : 2D :py:class:`numpy.ndarray` + Subset of cross-validation features for the attack class. + + ``one_class_flag`` : :py:class:`bool` + If set to ``True``, only positive/real samples will be used to + compute the mean and std normalization vectors. Set to ``True`` if + using one-class SVM. Default: False. + + **Returns:** + + ``real_train_norm`` : 2D :py:class:`numpy.ndarray` + Normalized subset of train features for the real class. + + ``real_cv_norm`` : 2D :py:class:`numpy.ndarray` + Normalized subset of cross-validation features for the real class. + + ``attack_train_norm`` : 2D :py:class:`numpy.ndarray` + Normalized subset of train features for the attack class. + + ``attack_cv_norm`` : 2D :py:class:`numpy.ndarray` + Normalized subset of cross-validation features for the attack class. + """ + if not (one_class_flag): + + features_train = np.vstack([real_train, attack_train]) + + features_train_norm, features_mean, features_std = self.mean_std_normalize( + features_train) + + real_train_norm = features_train_norm[0:real_train.shape[0], :] + + attack_train_norm = features_train_norm[real_train.shape[0]:, :] + + real_cv_norm, _, _ = self.mean_std_normalize( + real_cv, features_mean, features_std) + + attack_cv_norm, _, _ = self.mean_std_normalize( + attack_cv, features_mean, features_std) + + else: # one-class SVM case + + # only real class used for training in one class SVM: + real_train_norm, features_mean, features_std = self.mean_std_normalize( + real_train) + + attack_train_norm, _, _ = self.mean_std_normalize( + attack_train, features_mean, features_std) + + real_cv_norm, _, _ = self.mean_std_normalize( + real_cv, features_mean, features_std) + + attack_cv_norm, _, _ = self.mean_std_normalize( + attack_cv, features_mean, features_std) + + return real_train_norm, real_cv_norm, attack_train_norm, attack_cv_norm + + # ========================================================================== + def train_svm( + self, + training_features, + n_samples=10000, + machine_type='C_SVC', + kernel_type='RBF', + trainer_grid_search_params={ + 'cost': [2**p for p in range(-5, 16, 2)], + 'gamma': [2**p for p in range(-15, 4, 2)] + }, + mean_std_norm_flag=False, + projector_file="", + save_debug_data_flag=True, + reduced_train_data_flag=False, + n_train_samples=50000): + """ + First, this function tunes the hyper-parameters of the SVM classifier using + grid search on the sub-sets of training data. Train and cross-validation + subsets for both classes are formed from the available input training_features. + + Once successfull parameters are determined the SVM is trained on the + whole training data set. The resulting machine is returned by the function. + + **Parameters:** + + ``training_features`` : [[FrameContainer], [FrameContainer]] + A list containing two elements: [0] - a list of Frame Containers with + feature vectors for the real class; [1] - a list of Frame Containers with + feature vectors for the attack class. + + ``n_samples`` : :py:class:`int` + Number of uniformly selected feature vectors per class defining the + sizes of sub-sets used in the hyper-parameter grid search. + + ``machine_type`` : :py:class:`str` + A type of the SVM machine. Please check ``bob.learn.libsvm`` for + more details. + + ``kernel_type`` : :py:class:`str` + A type of kerenel for the SVM machine. Please check ``bob.learn.libsvm`` + for more details. + + ``trainer_grid_search_params`` : :py:class:`dict` + Dictionary containing the hyper-parameters of the SVM to be tested + in the grid-search. + + ``mean_std_norm_flag`` : :py:class:`bool` + Perform mean-std normalization of data if set to True. Default: False. + + ``projector_file`` : :py:class:`str` + The name of the file to save the trained projector to. Only the path + of this file is used in this function. The file debug_data.hdf5 will + be save in this path. This file contains information, which might be + usefull for debugging. + + ``save_debug_data_flag`` : :py:class:`bool` + Save the data, which might be usefull for debugging if ``True``. + Default: ``True``. + + ``reduced_train_data_flag`` : :py:class:`bool` + Reduce the amount of final training samples if set to ``True``. + Default: ``False``. + + ``n_train_samples`` : :py:class:`int` + Number of uniformly selected feature vectors per class defining the + sizes of sub-sets used in the final traing of the SVM. + Default: 50000. + + **Returns:** + + ``machine`` : object + A trained SVM machine. + """ + + one_class_flag = ( + machine_type == 'ONE_CLASS') # True if one-class SVM is used + + # get the data for the hyper-parameter grid-search: + real_train, real_cv, attack_train, attack_cv = self.prepare_data_for_hyper_param_grid_search( + training_features, n_samples) + + if mean_std_norm_flag: + # normalize the data: + real_train, real_cv, attack_train, attack_cv = self.norm_train_cv_data( + real_train, real_cv, attack_train, attack_cv, one_class_flag) + + precisions_cv = [ + ] # for saving the precision on the cross-validation set + + precisions_train = [] + + trainer_grid_search_params_list = combinations( + trainer_grid_search_params + ) # list containing all combinations of params + + for trainer_grid_search_param in trainer_grid_search_params_list: + + # initialize the SVM trainer: + trainer = bob.learn.libsvm.Trainer( + machine_type=machine_type, + kernel_type=kernel_type, + probability=True) + + for key in trainer_grid_search_param.keys(): + setattr(trainer, key, trainer_grid_search_param[ + key]) # set the params of trainer + + if not (one_class_flag): # two-class SVM case + + data = [ + np.copy(real_train), + np.copy(attack_train) + ] # data used for training the machine in the grid-search + + else: # one class SVM case + + data = [np.copy(real_train) + ] # only real class is used for training + + machine = trainer.train(data) # train the machine + + precision_cv = self.comp_prediction_precision( + machine, np.copy(real_cv), np.copy(attack_cv)) + + precision_train = self.comp_prediction_precision( + machine, np.copy(real_train), np.copy(attack_train)) + + precisions_cv.append(precision_cv) + + precisions_train.append(precision_train) + + del data + del machine + del trainer + + # best SVM parameters according to CV set + selected_params = trainer_grid_search_params_list[np.argmax(precisions_cv)] + + trainer = bob.learn.libsvm.Trainer( + machine_type=machine_type, + kernel_type=kernel_type, + probability=True) + + for key in selected_params.keys(): + setattr(trainer, key, + selected_params[key]) # set the params of trainer + + # Save the data, which is usefull for debugging. + if save_debug_data_flag: + + debug_file = os.path.join( + os.path.split(projector_file)[0], "debug_data.hdf5") + debug_dict = {} + debug_dict['precisions_train'] = precisions_train + debug_dict['precisions_cv'] = precisions_cv + + for key in selected_params.keys(): + debug_dict[key] = selected_params[key] + + f = bob.io.base.HDF5File( + debug_file, 'w') # open hdf5 file to save the debug data + for key in debug_dict.keys(): + f.set(key, debug_dict[key]) + del f + + # training_features[0] - training features for the REAL class. + real = convert_and_prepare_features( + training_features[0]) # output is array + # training_features[1] - training features for the ATTACK class. + attack = convert_and_prepare_features( + training_features[1]) # output is array + + features_mean = 0.0 + features_std = 1.0 + if mean_std_norm_flag: + # Normalize the data: + if not (one_class_flag): # two-class SVM case + + features = np.vstack([real, attack]) + features_norm, features_mean, features_std = self.mean_std_normalize( + features) + real = features_norm[0:real.shape[ + 0], :] # The array is now normalized + attack = features_norm[real.shape[ + 0]:, :] # The array is now normalized + + else: # one-class SVM case + + real, features_mean, features_std = self.mean_std_normalize( + real) # use only real class to compute normalizers + attack = self.mean_std_normalize(attack, features_mean, + features_std) + # ``real`` and ``attack`` arrays are now normalizaed + + if reduced_train_data_flag: + # uniformly select subsets of features: + real = select_quasi_uniform_data_subset(real, n_train_samples) + attack = select_quasi_uniform_data_subset( + attack, n_train_samples) + + if not (one_class_flag): # two-class SVM case + + data = [np.copy(real), np.copy(attack)] # data for final training + + else: # one-class SVM case + + data = [np.copy(real)] # only real class used for training + + # free the memory of unnecessary data + del real + del attack + + machine = trainer.train(data) # train the machine + + if mean_std_norm_flag: + machine.input_subtract = features_mean # subtract the mean of train data + machine.input_divide = features_std # divide by std of train data + + del data + + return machine + + # ========================================================================== + def train_projector(self, training_features, projector_file): + """ + Train SVM feature projector and save the trained SVM to a given file. + The ``requires_projector_training = True`` flag must be set to True to + enable this function. + + **Parameters:** + + ``training_features`` : [[FrameContainer], [FrameContainer]] + A list containing two elements: [0] - a list of Frame Containers with + feature vectors for the real class; [1] - a list of Frame Containers with + feature vectors for the attack class. + + ``projector_file`` : :py:class:`str` + The file to save the trained projector to. + This file should be readable with the :py:meth:`load_projector` function. + """ + + machine = self.train_svm( + training_features=training_features, + n_samples=self.n_samples, + machine_type=self.machine_type, + kernel_type=self.kernel_type, + trainer_grid_search_params=self.trainer_grid_search_params, + mean_std_norm_flag=self.mean_std_norm_flag, + projector_file=projector_file, + save_debug_data_flag=self.save_debug_data_flag, + reduced_train_data_flag=self.reduced_train_data_flag, + n_train_samples=self.n_train_samples) + + f = bob.io.base.HDF5File(projector_file, + 'w') # open hdf5 file to save to + + machine.save(f) # save the machine and normalization parameters + + del f + + # ========================================================================== + def load_projector(self, projector_file): + """ + Load the pretrained projector/SVM from file to perform a feature projection. + This function usually is useful in combination with the + :py:meth:`train_projector` function. + + Please register `performs_projection = True` in the constructor to + enable this function. + + **Parameters:** + + ``projector_file`` : :py:class:`str` + The file to read the projector from. + """ + + f = bob.io.base.HDF5File(projector_file, 'a') + + self.machine = bob.learn.libsvm.Machine(f) + + del f + + # ========================================================================== + def project(self, feature): + """ + This function computes class probabilities for the input feature using pretrained SVM. + The feature in this case is a Frame Container with features for each frame. + The probabilities will be computed and returned for each frame. + + Set ``performs_projection = True`` in the constructor to enable this function. + It is assured that the :py:meth:`load_projector` was called before the + ``project`` function is executed. + + **Parameters:** + + ``feature`` : object + A Frame Container conteining the features of an individual, + see ``bob.bio.video.utils.FrameContainer``. + + **Returns:** + + ``probabilities`` : 1D or 2D :py:class:`numpy.ndarray` + 2D in the case of two-class SVM. + An array containing class probabilities for each frame. + First column contains probabilities for each frame being a real class. + Second column contains probabilities for each frame being an attack class. + 1D in the case of one-class SVM. + Vector with scores for each frame defining belonging to the real class. + Must be writable with the ``write_feature`` function and + readable with the ``read_feature`` function. + """ + + if isinstance( + feature, + FrameContainer): # if FrameContainer convert to 2D numpy array + + features_array = convert_frame_cont_to_array(feature) + + else: + + features_array = feature + + if not (self.machine_type == 'ONE_CLASS'): # two-class SVM case + + probabilities = self.machine.predict_class_and_probabilities( + features_array)[1] + + else: + + probabilities = self.machine.predict_class_and_scores( + features_array)[1] + + return probabilities + + # ========================================================================== + def score(self, toscore): + """ + Returns a probability of a sample being a real class. + + **Parameters:** + + ``toscore`` : 1D or 2D :py:class:`numpy.ndarray` + 2D in the case of two-class SVM. + An array containing class probabilities for each frame. + First column contains probabilities for each frame being a real class. + Second column contains probabilities for each frame being an attack class. + 1D in the case of one-class SVM. + Vector with scores for each frame defining belonging to the real class. + + **Returns:** + + ``score`` : :py:class:`float` or a 1D :py:class:`numpy.ndarray` + If ``frame_level_scores_flag = False`` a single score is returned. + One score per video. + Score is a probability of a sample being a real class. + If ``frame_level_scores_flag = True`` a 1D array of scores is returned. + One score per frame. + Score is a probability of a sample being a real class. + """ + + if self.frame_level_scores_flag: + + score = toscore[:, + 0] # here score is a 1D array containing scores for each frame + + else: + + score = np.mean(toscore[:, 0]) # compute a single score per sample + + return score + + # ========================================================================== + def score_for_multiple_projections(self, toscore): + """ + Returns a list of scores computed by the score method of this class. + + **Parameters:** + + ``toscore`` : 1D or 2D :py:class:`numpy.ndarray` + 2D in the case of two-class SVM. + An array containing class probabilities for each frame. + First column contains probabilities for each frame being a real class. + Second column contains probabilities for each frame being an attack class. + 1D in the case of one-class SVM. + Vector with scores for each frame defining belonging to the real class. + + **Returns:** + + ``list_of_scores`` : [:py:class:`float`] + A list containing the scores. + """ + + scores = self.score( + toscore) # returns float score or 1D array of scores + + if isinstance(scores, np.float): # if a single score + + list_of_scores = [scores] + + else: + + list_of_scores = list(scores) + + return list_of_scores diff --git a/bob/pad/base/test/test_algorithms.py b/bob/pad/base/test/test_algorithms.py new file mode 100644 index 0000000..784a1aa --- /dev/null +++ b/bob/pad/base/test/test_algorithms.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : +# @author: Manuel Guenther <Manuel.Guenther@idiap.ch> +# @author: Pavel Korshunov <pavel.korshunov@idiap.ch> +# @date: Tue May 17 12:09:22 CET 2016 +# + +import numpy as np +from bob.io.base.test_utils import datafile +from bob.io.base import load + +import bob.io.image # for image loading functionality + +import bob.bio.video +from bob.pad.base.algorithm import SVM + +import random + +from bob.pad.base.utils import convert_array_to_list_of_frame_cont, convert_frame_cont_to_array + +def test_video_svm_pad_algorithm(): + """ + Test the VideoSvmPadAlgorithm algorithm. + """ + + random.seed(7) + + N = 20000 + mu = 1 + sigma = 1 + real_array = np.transpose( + np.vstack([[random.gauss(mu, sigma) for _ in range(N)], + [random.gauss(mu, sigma) for _ in range(N)]])) + + mu = 5 + sigma = 1 + attack_array = np.transpose( + np.vstack([[random.gauss(mu, sigma) for _ in range(N)], + [random.gauss(mu, sigma) for _ in range(N)]])) + + real = convert_array_to_list_of_frame_cont(real_array) + attack = convert_array_to_list_of_frame_cont(attack_array) + + training_features = [real, attack] + + MACHINE_TYPE = 'C_SVC' + KERNEL_TYPE = 'RBF' + N_SAMPLES = 1000 + TRAINER_GRID_SEARCH_PARAMS = {'cost': [1], 'gamma': [0.5, 1]} + MEAN_STD_NORM_FLAG = True # enable mean-std normalization + FRAME_LEVEL_SCORES_FLAG = True # one score per frame(!) in this case + + algorithm = SVM( + machine_type=MACHINE_TYPE, + kernel_type=KERNEL_TYPE, + n_samples=N_SAMPLES, + trainer_grid_search_params=TRAINER_GRID_SEARCH_PARAMS, + mean_std_norm_flag=MEAN_STD_NORM_FLAG, + frame_level_scores_flag=FRAME_LEVEL_SCORES_FLAG) + + machine = algorithm.train_svm( + training_features=training_features, + n_samples=algorithm.n_samples, + machine_type=algorithm.machine_type, + kernel_type=algorithm.kernel_type, + trainer_grid_search_params=algorithm.trainer_grid_search_params, + mean_std_norm_flag=algorithm.mean_std_norm_flag, + projector_file="", + save_debug_data_flag=False) + + assert machine.n_support_vectors == [148, 150] + assert machine.gamma == 0.5 + + real_sample = convert_frame_cont_to_array(real[0]) + + prob = machine.predict_class_and_probabilities(real_sample)[1] + + assert prob[0, 0] > prob[0, 1] + + precision = algorithm.comp_prediction_precision(machine, real_array, + attack_array) + + assert precision > 0.99 \ No newline at end of file diff --git a/bob/pad/base/utils/__init__.py b/bob/pad/base/utils/__init__.py new file mode 100644 index 0000000..cb8d241 --- /dev/null +++ b/bob/pad/base/utils/__init__.py @@ -0,0 +1,3 @@ +from .helper_functions import * + + diff --git a/bob/pad/base/utils/helper_functions.py b/bob/pad/base/utils/helper_functions.py new file mode 100644 index 0000000..f955904 --- /dev/null +++ b/bob/pad/base/utils/helper_functions.py @@ -0,0 +1,226 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : + +import numpy as np + + +def convert_frame_cont_to_array(frame_container): + """ + This function converts a single Frame Container into an array of features. + The rows are samples, the columns are features. + + **Parameters:** + + ``frame_container`` : object + A Frame Container conteining the features of an individual, + see ``bob.bio.video.utils.FrameContainer``. + + **Returns:** + + ``features_array`` : 2D :py:class:`numpy.ndarray` + An array containing features for all frames. + The rows are samples, the columns are features. + """ + + feature_vectors = [] + + frame_dictionary = {} + + for frame in frame_container: + frame_dictionary[frame[0]] = frame[1] + + for idx, _ in enumerate(frame_container): + # Frames are stored in a mixed order, therefore we get them using incrementing frame index: + feature_vectors.append(frame_dictionary[str(idx)]) + + features_array = np.vstack(feature_vectors) + + return features_array + + +def convert_and_prepare_features(features): + """ + This function converts a list or a frame container of features into a 2D array of features. + If the input is a list of frame containers, features from different frame containers (individuals) + are concatenated into the same list. This list is then converted to an array. The rows are samples, + the columns are features. + + **Parameters:** + + ``features`` : [2D :py:class:`numpy.ndarray`] or [FrameContainer] + A list or 2D feature arrays or a list of Frame Containers, see ``bob.bio.video.utils.FrameContainer``. + Each frame Container contains feature vectors for the particular individual/person. + + **Returns:** + + ``features_array`` : 2D :py:class:`numpy.ndarray` + An array containing features for all samples and frames. + """ + + if isinstance( + features[0], + FrameContainer): # if FrameContainer convert to 2D numpy array + return self.convert_list_of_frame_cont_to_array(features) + else: + return np.vstack(features) + + +def convert_list_of_frame_cont_to_array(frame_containers): + """ + This function converts a list of Frame containers into an array of features. + Features from different frame containers (individuals) are concatenated into the + same list. This list is then converted to an array. The rows are samples, + the columns are features. + + **Parameters:** + + ``frame_containers`` : [FrameContainer] + A list of Frame Containers, , see ``bob.bio.video.utils.FrameContainer``. + Each frame Container contains feature vectors for the particular individual/person. + + **Returns:** + + ``features_array`` : 2D :py:class:`numpy.ndarray` + An array containing features for all frames of all individuals. + """ + + feature_vectors = [] + + for frame_container in frame_containers: + video_features_array = self.convert_frame_cont_to_array( + frame_container) + + feature_vectors.append(video_features_array) + + features_array = np.vstack(feature_vectors) + + return features_array + + +def combinations(input_dict): + """ + Obtain all possible key-value combinations in the input dictionary + containing list values. + + **Parameters:** + + ``input_dict`` : :py:class:`dict` + Input dictionary with list values. + + **Returns:** + + ``combinations`` : [:py:class:`dict`] + A list of dictionaries containing the combinations. + """ + + varNames = sorted(input_dict) + + combinations = [ + dict(zip(varNames, prod)) + for prod in it.product(*(input_dict[varName] + for varName in varNames)) + ] + + return combinations + + +def select_uniform_data_subset(features, n_samples): + """ + Uniformly select N samples/feature vectors from the input array of samples. + The rows in the input array are samples. The columns are features. + + **Parameters:** + + ``features`` : 2D :py:class:`numpy.ndarray` + Input array with feature vectors. The rows are samples, columns are features. + + ``n_samples`` : :py:class:`int` + The number of samples to be selected uniformly from the input array of features. + + **Returns:** + + ``features_subset`` : 2D :py:class:`numpy.ndarray` + Selected subset of features. + """ + + if features.shape[0] <= n_samples: + + features_subset = features + + else: + + uniform_step = np.int(features.shape[0] / n_samples) + + features_subset = features[0:np.int(uniform_step * n_samples): + uniform_step, :] + + return features_subset + + +def select_quasi_uniform_data_subset(features, n_samples): + """ + Select quasi uniformly N samples/feature vectors from the input array of samples. + The rows in the input array are samples. The columns are features. + Use this function if n_samples is close to the number of samples. + + **Parameters:** + + ``features`` : 2D :py:class:`numpy.ndarray` + Input array with feature vectors. The rows are samples, columns are features. + + ``n_samples`` : :py:class:`int` + The number of samples to be selected uniformly from the input array of features. + + **Returns:** + + ``features_subset`` : 2D :py:class:`numpy.ndarray` + Selected subset of features. + """ + + if features.shape[0] <= n_samples: + + features_subset = features + + else: + + uniform_step = (1.0 * features.shape[0]) / n_samples + + element_num_list = range(0, n_samples) + + idx = [np.int(uniform_step * item) for item in element_num_list] + + features_subset = features[idx, :] + + return features_subset + + +def convert_array_to_list_of_frame_cont(data): + """ + Convert an input 2D array to a list of FrameContainers. + + **Parameters:** + + ``data`` : 2D :py:class:`numpy.ndarray` + Input data array of the dimensionality (N_samples X N_features ). + + **Returns:** + + ``frame_container_list`` : [FrameContainer] + A list of FrameContainers, see ``bob.bio.video.utils.FrameContainer`` + for further details. Each frame container contains one feature vector. + """ + + frame_container_list = [] + + for idx, vec in enumerate(data): + + frame_container = bob.bio.video.FrameContainer( + ) # initialize the FrameContainer + + frame_container.add(0, vec) + + frame_container_list.append( + frame_container) # add current frame to FrameContainer + + return frame_container_list + -- GitLab