diff --git a/README.rst b/README.rst index 16ad8649340b5f35eb0526b2cd65f9a9e04fd281..5b6c578874ce4a0db243906fb5ad6e969d5eaa31 100644 --- a/README.rst +++ b/README.rst @@ -20,8 +20,7 @@ ==================================================== This package is part of the signal-processing and machine learning toolbox -Bob_. It implementes several different algorithms that are useful to detect -presentation attacks. +Bob_. It implements tools for spoofing or presentation attack detection in face biometrics. Installation diff --git a/bob/pad/face/algorithm/VideoSvmPadAlgorithm.py b/bob/pad/face/algorithm/VideoSvmPadAlgorithm.py index 9cbaeb7d79365420c4cd7646afe5d50f0cd03237..d84b07529f7faf4dffbad287b20137794e275172 100644 --- a/bob/pad/face/algorithm/VideoSvmPadAlgorithm.py +++ b/bob/pad/face/algorithm/VideoSvmPadAlgorithm.py @@ -60,6 +60,19 @@ class VideoSvmPadAlgorithm(Algorithm): ``frame_level_scores_flag`` : :py:class:`bool` Return scores for each frame individually if True. Otherwise, return a single score per video. Default: False. + + ``save_debug_data_flag`` : :py:class:`bool` + Save the data, which might be usefull for debugging if ``True``. + Default: ``True``. + + ``reduced_train_data_flag`` : :py:class:`bool` + Reduce the amount of final training samples if set to ``True``. + Default: ``False``. + + ``n_train_samples`` : :py:class:`int` + Number of uniformly selected feature vectors per class defining the + sizes of sub-sets used in the final traing of the SVM. + Default: 50000. """ def __init__(self, @@ -68,7 +81,10 @@ class VideoSvmPadAlgorithm(Algorithm): n_samples = 10000, trainer_grid_search_params = { 'cost': [2**p for p in range(-5, 16, 2)], 'gamma': [2**p for p in range(-15, 4, 2)]}, mean_std_norm_flag = False, - frame_level_scores_flag = False): + frame_level_scores_flag = False, + save_debug_data_flag = True, + reduced_train_data_flag = False, + n_train_samples = 50000): Algorithm.__init__(self, @@ -78,6 +94,9 @@ class VideoSvmPadAlgorithm(Algorithm): trainer_grid_search_params = trainer_grid_search_params, mean_std_norm_flag = mean_std_norm_flag, frame_level_scores_flag = frame_level_scores_flag, + save_debug_data_flag = save_debug_data_flag, + reduced_train_data_flag = reduced_train_data_flag, + n_train_samples = n_train_samples, performs_projection=True, requires_projector_training=True) @@ -87,6 +106,9 @@ class VideoSvmPadAlgorithm(Algorithm): self.trainer_grid_search_params = trainer_grid_search_params self.mean_std_norm_flag = mean_std_norm_flag self.frame_level_scores_flag = frame_level_scores_flag + self.save_debug_data_flag = save_debug_data_flag + self.reduced_train_data_flag = reduced_train_data_flag + self.n_train_samples = n_train_samples self.machine = None @@ -217,6 +239,44 @@ class VideoSvmPadAlgorithm(Algorithm): return features_subset + #========================================================================== + def select_quasi_uniform_data_subset(self, features, n_samples): + """ + Select quasi uniformly N samples/feature vectors from the input array of samples. + The rows in the input array are samples. The columns are features. + Use this function if n_samples is close to the number of samples. + + **Parameters:** + + ``features`` : 2D :py:class:`numpy.ndarray` + Input array with feature vectors. The rows are samples, columns are features. + + ``n_samples`` : :py:class:`int` + The number of samples to be selected uniformly from the input array of features. + + **Returns:** + + ``features_subset`` : 2D :py:class:`numpy.ndarray` + Selected subset of features. + """ + + if features.shape[0] <= n_samples: + + features_subset = features + + else: + + uniform_step = (1.0 * features.shape[0]) / n_samples + + element_num_list = range(0,n_samples) + + idx = [np.int(uniform_step*item) for item in element_num_list] + + features_subset = features[idx, :] + + return features_subset + + #========================================================================== def split_data_to_train_cv(self, features): """ @@ -447,7 +507,9 @@ class VideoSvmPadAlgorithm(Algorithm): trainer_grid_search_params = { 'cost': [2**p for p in range(-5, 16, 2)], 'gamma': [2**p for p in range(-15, 4, 2)]}, mean_std_norm_flag = False, projector_file = "", - save_debug_data_flag = True): + save_debug_data_flag = True, + reduced_train_data_flag = False, + n_train_samples = 50000): """ First, this function tunes the hyper-parameters of the SVM classifier using grid search on the sub-sets of training data. Train and cross-validation @@ -492,6 +554,15 @@ class VideoSvmPadAlgorithm(Algorithm): Save the data, which might be usefull for debugging if ``True``. Default: ``True``. + ``reduced_train_data_flag`` : :py:class:`bool` + Reduce the amount of final training samples if set to ``True``. + Default: ``False``. + + ``n_train_samples`` : :py:class:`int` + Number of uniformly selected feature vectors per class defining the + sizes of sub-sets used in the final traing of the SVM. + Default: 50000. + **Returns:** ``machine`` : object @@ -574,6 +645,12 @@ class VideoSvmPadAlgorithm(Algorithm): real = features_norm[0:real.shape[0], :] # The array is now normalized attack = features_norm[real.shape[0]:, :] # The array is now normalized + if reduced_train_data_flag: + + # uniformly select subsets of features: + real = self.select_quasi_uniform_data_subset(real, n_train_samples) + attack = self.select_quasi_uniform_data_subset(attack, n_train_samples) + data = [np.copy(real), np.copy(attack)] # data for final training machine = trainer.train(data) # train the machine @@ -612,7 +689,10 @@ class VideoSvmPadAlgorithm(Algorithm): kernel_type = self.kernel_type, trainer_grid_search_params = self.trainer_grid_search_params, mean_std_norm_flag = self.mean_std_norm_flag, - projector_file = projector_file) + projector_file = projector_file, + save_debug_data_flag = self.save_debug_data_flag, + reduced_train_data_flag = self.reduced_train_data_flag, + n_train_samples = self.n_train_samples) f = bob.io.base.HDF5File(projector_file, 'w') # open hdf5 file to save to diff --git a/bob/pad/face/config/database/aggregated_db.py b/bob/pad/face/config/database/aggregated_db.py new file mode 100644 index 0000000000000000000000000000000000000000..e12abfcd7cc21f9747caebcf9aa6a5e674170b17 --- /dev/null +++ b/bob/pad/face/config/database/aggregated_db.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python + +"""Aggregated Db is a database for face PAD experiments. +This database aggregates the data from 3 publicly available data-sets: +`REPLAYATTACK`_, `REPLAY-MOBILE`_ and `MSU MFSD`_. +You can download the data for the above databases by following the corresponding +links. + +The reference citation for the `REPLAYATTACK`_ is [CAM12]_. +The reference citation for the `REPLAY-MOBILE`_ is [CBVM16]_. +The reference citation for the `MSU MFSD`_ is [WHJ15]_. + +.. include:: links.rst +""" + +from bob.pad.face.database import AggregatedDbPadDatabase + +# Directory where the data files are stored. +# This directory is given in the .bob_bio_databases.txt file located in your home directory +original_directory = "[YOUR_AGGREGATED_DB_DIRECTORIES]" +"""Value of ``~/.bob_bio_databases.txt`` for this database""" + +original_extension = ".mov" # extension of the data files + +database = AggregatedDbPadDatabase( + protocol='grandtest', + original_directory=original_directory, + original_extension=original_extension, + training_depends_on_protocol=True, +) +"""The :py:class:`bob.pad.base.database.PadDatabase` derivative with Aggregated Db +database settings. + +.. warning:: + + This class only provides a programmatic interface to load data in an orderly + manner, respecting usage protocols. It does **not** contain the raw + data files. You should procure those yourself. + +Notice that ``original_directory`` is set to ``[YOUR_AGGREGATED_DB_DIRECTORIES]``. +You must make sure to create ``${HOME}/.bob_bio_databases.txt`` file setting this +value to the places where you actually installed the Replay-Attack, Replay-Mobile +and MSU MFSD Databases. In particular, the paths pointing to these 3 databases +must be separated with a space. See the following note with an example of +``[YOUR_AGGREGATED_DB_DIRECTORIES]`` entry in the ``${HOME}/.bob_bio_databases.txt`` file. + +.. note:: + + [YOUR_AGGREGATED_DB_DIRECTORIES] = <PATH_TO_REPLAY_ATTACK> <PATH_TO_REPLAY_MOBILE> <PATH_TO_MSU_MFSD> +""" \ No newline at end of file diff --git a/bob/pad/face/config/frame_diff_svm_aggregated_db.py b/bob/pad/face/config/frame_diff_svm_aggregated_db.py new file mode 100644 index 0000000000000000000000000000000000000000..0379d88193b9f21eeab0eee8bafb7ebefae067ea --- /dev/null +++ b/bob/pad/face/config/frame_diff_svm_aggregated_db.py @@ -0,0 +1,106 @@ +#!/usr/bin/env python2 +# -*- coding: utf-8 -*- + +""" +This file contains configurations to run Frame Differences and SVM based face PAD baseline. +The settings of the preprocessor and extractor are tuned for the Replay-attack database. +In the SVM algorithm the amount of training data is reduced speeding-up the training for +large data sets, such as Aggregated PAD database. +The IQM features used in this algorithm/resource are introduced in the following papers: [WHJ15]_ and [CBVM16]_. +""" + + +#======================================================================================= +sub_directory = 'frame_diff_svm' +""" +Sub-directory where results will be placed. + +You may change this setting using the ``--sub-directory`` command-line option +or the attribute ``sub_directory`` in a configuration file loaded **after** +this resource. +""" + + +#======================================================================================= +# define preprocessor: + +from ..preprocessor import FrameDifference + +NUMBER_OF_FRAMES = None # process all frames +CHECK_FACE_SIZE_FLAG = True # Check size of the face +MIN_FACE_SIZE = 50 # Minimal size of the face to consider + +preprocessor = FrameDifference(number_of_frames = NUMBER_OF_FRAMES, + check_face_size_flag = CHECK_FACE_SIZE_FLAG, + min_face_size = MIN_FACE_SIZE) +""" +In the preprocessing stage the frame differences are computed for both facial and non-facial/background +regions. In this case all frames of the input video are considered, which is defined by +``number_of_frames = None``. The frames containing faces of the size below ``min_face_size = 50`` threshold +are discarded. Both RGB and gray-scale videos are acceptable by the preprocessor. +The preprocessing idea is introduced in [AM11]_. +""" + + +#======================================================================================= +# define extractor: + +from ..extractor import FrameDiffFeatures + +WINDOW_SIZE=20 +OVERLAP=0 + +extractor = FrameDiffFeatures(window_size=WINDOW_SIZE, + overlap=OVERLAP) +""" +In the feature extraction stage 5 features are extracted for all non-overlapping windows in +the Frame Difference input signals. Five features are computed for each of windows in the +facial face regions, the same is done for non-facial regions. The non-overlapping option +is controlled by ``overlap = 0``. The length of the window is defined by ``window_size`` +argument. +The features are introduced in the following paper: [AM11]_. +""" + + +#======================================================================================= +# define algorithm: + +from ..algorithm import VideoSvmPadAlgorithm + +MACHINE_TYPE = 'C_SVC' +KERNEL_TYPE = 'RBF' +N_SAMPLES = 10000 +TRAINER_GRID_SEARCH_PARAMS = {'cost': [2**P for P in range(-3, 14, 2)], 'gamma': [2**P for P in range(-15, 0, 2)]} +MEAN_STD_NORM_FLAG = True # enable mean-std normalization +FRAME_LEVEL_SCORES_FLAG = True # one score per frame(!) in this case +SAVE_DEBUG_DATA_FLAG = True # save the data, which might be useful for debugging +REDUCED_TRAIN_DATA_FLAG = True # reduce the amount of training data in the final training stage +N_TRAIN_SAMPLES = 50000 # number of training samples per class in the final SVM training stage + +algorithm = VideoSvmPadAlgorithm(machine_type = MACHINE_TYPE, + kernel_type = KERNEL_TYPE, + n_samples = N_SAMPLES, + trainer_grid_search_params = TRAINER_GRID_SEARCH_PARAMS, + mean_std_norm_flag = MEAN_STD_NORM_FLAG, + frame_level_scores_flag = FRAME_LEVEL_SCORES_FLAG, + save_debug_data_flag = SAVE_DEBUG_DATA_FLAG, + reduced_train_data_flag = REDUCED_TRAIN_DATA_FLAG, + n_train_samples = N_TRAIN_SAMPLES) +""" +The SVM algorithm with RBF kernel is used to classify the data into *real* and *attack* classes. +One score is produced for each frame of the input video, ``frame_level_scores_flag = True``. +The grid search of SVM parameters is used to select the successful settings. +The grid search is done on the subset of training data. +The size of this subset is defined by ``n_samples`` parameter. +The final training of the SVM is done on the subset of training data ``reduced_train_data_flag = True``. +The size of the subset for the final training stage is defined by the ``n_train_samples`` argument. +The data is also mean-std normalized, ``mean_std_norm_flag = True``. +""" + + + + + + + + diff --git a/bob/pad/face/config/lbp_svm_aggregated_db.py b/bob/pad/face/config/lbp_svm_aggregated_db.py new file mode 100644 index 0000000000000000000000000000000000000000..5d249a7c83ed3169319e06130096f89a1e725ca7 --- /dev/null +++ b/bob/pad/face/config/lbp_svm_aggregated_db.py @@ -0,0 +1,116 @@ +#!/usr/bin/env python2 +# -*- coding: utf-8 -*- + +""" +This file contains configurations to run LBP and SVM based face PAD baseline. +The settings of the preprocessor and extractor are tuned for the Replay-attack database. +In the SVM algorithm the amount of training data is reduced speeding-up the training for +large data sets, such as Aggregated PAD database. +The idea of the algorithm is introduced in the following paper: [CAM12]_. +However some settings are different from the ones introduced in the paper. +""" + + +#======================================================================================= +sub_directory = 'lbp_svm_aggregated_db' +""" +Sub-directory where results will be placed. + +You may change this setting using the ``--sub-directory`` command-line option +or the attribute ``sub_directory`` in a configuration file loaded **after** +this resource. +""" + + +#======================================================================================= +# define preprocessor: + +from ..preprocessor import VideoFaceCrop + +CROPPED_IMAGE_SIZE = (64, 64) # The size of the resulting face +CROPPED_POSITIONS = {'topleft' : (0,0) , 'bottomright' : CROPPED_IMAGE_SIZE} +FIXED_POSITIONS = None +MASK_SIGMA = None # The sigma for random values areas outside image +MASK_NEIGHBORS = 5 # The number of neighbors to consider while extrapolating +MASK_SEED = None # The seed for generating random values during extrapolation +CHECK_FACE_SIZE_FLAG = True # Check the size of the face +MIN_FACE_SIZE = 50 # Minimal possible size of the face +USE_LOCAL_CROPPER_FLAG = True # Use the local face cropping class (identical to Ivana's paper) +COLOR_CHANNEL = 'gray' # Convert image to gray-scale format + +preprocessor = VideoFaceCrop(cropped_image_size = CROPPED_IMAGE_SIZE, + cropped_positions = CROPPED_POSITIONS, + fixed_positions = FIXED_POSITIONS, + mask_sigma = MASK_SIGMA, + mask_neighbors = MASK_NEIGHBORS, + mask_seed = None, + check_face_size_flag = CHECK_FACE_SIZE_FLAG, + min_face_size = MIN_FACE_SIZE, + use_local_cropper_flag = USE_LOCAL_CROPPER_FLAG, + color_channel = COLOR_CHANNEL) +""" +In the preprocessing stage the face is cropped in each frame of the input video given facial annotations. +The size of the face is normalized to ``cropped_image_size`` dimensions. The faces with the size +below ``min_face_size`` threshold are discarded. The preprocessor is similar to the one introduced in +[CAM12]_, which is defined by ``use_local_cropper_flag = True``. +""" + + +#======================================================================================= +# define extractor: + +from ..extractor import VideoLBPHistogram + +LBPTYPE='uniform' +ELBPTYPE='regular' +RAD=1 +NEIGHBORS=8 +CIRC=False +DTYPE=None + +extractor = VideoLBPHistogram(lbptype=LBPTYPE, + elbptype=ELBPTYPE, + rad=RAD, + neighbors=NEIGHBORS, + circ=CIRC, + dtype=DTYPE) +""" +In the feature extraction stage the LBP histograms are extracted from each frame of the preprocessed video. +The parameters are similar to the ones introduced in [CAM12]_. +""" + + +#======================================================================================= +# define algorithm: + +from ..algorithm import VideoSvmPadAlgorithm + +MACHINE_TYPE = 'C_SVC' +KERNEL_TYPE = 'RBF' +N_SAMPLES = 10000 +TRAINER_GRID_SEARCH_PARAMS = {'cost': [2**P for P in range(-3, 14, 2)], 'gamma': [2**P for P in range(-15, 0, 2)]} +MEAN_STD_NORM_FLAG = True # enable mean-std normalization +FRAME_LEVEL_SCORES_FLAG = True # one score per frame(!) in this case +SAVE_DEBUG_DATA_FLAG = True # save the data, which might be useful for debugging +REDUCED_TRAIN_DATA_FLAG = True # reduce the amount of training data in the final training stage +N_TRAIN_SAMPLES = 50000 # number of training samples per class in the final SVM training stage + +algorithm = VideoSvmPadAlgorithm(machine_type = MACHINE_TYPE, + kernel_type = KERNEL_TYPE, + n_samples = N_SAMPLES, + trainer_grid_search_params = TRAINER_GRID_SEARCH_PARAMS, + mean_std_norm_flag = MEAN_STD_NORM_FLAG, + frame_level_scores_flag = FRAME_LEVEL_SCORES_FLAG, + save_debug_data_flag = SAVE_DEBUG_DATA_FLAG, + reduced_train_data_flag = REDUCED_TRAIN_DATA_FLAG, + n_train_samples = N_TRAIN_SAMPLES) +""" +The SVM algorithm with RBF kernel is used to classify the data into *real* and *attack* classes. +One score is produced for each frame of the input video, ``frame_level_scores_flag = True``. +The grid search of SVM parameters is used to select the successful settings. +The grid search is done on the subset of training data. +The size of this subset is defined by ``n_samples`` parameter. +The final training of the SVM is done on the subset of training data ``reduced_train_data_flag = True``. +The size of the subset for the final training stage is defined by the ``n_train_samples`` argument. +The data is also mean-std normalized, ``mean_std_norm_flag = True``. +""" \ No newline at end of file diff --git a/bob/pad/face/config/qm_svm_aggregated_db.py b/bob/pad/face/config/qm_svm_aggregated_db.py new file mode 100644 index 0000000000000000000000000000000000000000..4bd54236a09e56ef66fb503daef7baa2ee5a7182 --- /dev/null +++ b/bob/pad/face/config/qm_svm_aggregated_db.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python2 +# -*- coding: utf-8 -*- + +""" +This file contains configurations to run Image Quality Measures (IQM) and SVM based face PAD baseline. +The settings of the preprocessor and extractor are tuned for the Replay-attack database. +In the SVM algorithm the amount of training data is reduced speeding-up the training for +large data sets, such as Aggregated PAD database. +The IQM features used in this algorithm/resource are introduced in the following papers: [WHJ15]_ and [CBVM16]_. +""" + + +#======================================================================================= +sub_directory = 'qm_svm_aggregated_db' +""" +Sub-directory where results will be placed. + +You may change this setting using the ``--sub-directory`` command-line option +or the attribute ``sub_directory`` in a configuration file loaded **after** +this resource. +""" + + +#======================================================================================= +# define preprocessor: + +from ..preprocessor import VideoFaceCrop + +CROPPED_IMAGE_SIZE = (64, 64) # The size of the resulting face +CROPPED_POSITIONS = {'topleft' : (0,0) , 'bottomright' : CROPPED_IMAGE_SIZE} +FIXED_POSITIONS = None +MASK_SIGMA = None # The sigma for random values areas outside image +MASK_NEIGHBORS = 5 # The number of neighbors to consider while extrapolating +MASK_SEED = None # The seed for generating random values during extrapolation +CHECK_FACE_SIZE_FLAG = True # Check the size of the face +MIN_FACE_SIZE = 50 +USE_LOCAL_CROPPER_FLAG = True # Use the local face cropping class (identical to Ivana's paper) +RGB_OUTPUT_FLAG = True # Return RGB cropped face using local cropper + +preprocessor = VideoFaceCrop(cropped_image_size = CROPPED_IMAGE_SIZE, + cropped_positions = CROPPED_POSITIONS, + fixed_positions = FIXED_POSITIONS, + mask_sigma = MASK_SIGMA, + mask_neighbors = MASK_NEIGHBORS, + mask_seed = None, + check_face_size_flag = CHECK_FACE_SIZE_FLAG, + min_face_size = MIN_FACE_SIZE, + use_local_cropper_flag = USE_LOCAL_CROPPER_FLAG, + rgb_output_flag = RGB_OUTPUT_FLAG) +""" +In the preprocessing stage the face is cropped in each frame of the input video given facial annotations. +The size of the face is normalized to ``cropped_image_size`` dimensions. The faces of the size +below ``min_face_size`` threshold are discarded. The preprocessor is similar to the one introduced in +[CAM12]_, which is defined by ``use_local_cropper_flag = True``. The preprocessed frame is the RGB +facial image, which is defined by ``RGB_OUTPUT_FLAG = True``. +""" + + +#======================================================================================= +# define extractor: + +from ..extractor import VideoQualityMeasure + +GALBALLY=True +MSU=True +DTYPE=None + +extractor = VideoQualityMeasure(galbally=GALBALLY, + msu=MSU, + dtype=DTYPE) +""" +In the feature extraction stage the Image Quality Measures are extracted from each frame of the preprocessed RGB video. +The features to be computed are introduced in the following papers: [WHJ15]_ and [CBVM16]_. +""" + + +#======================================================================================= +# define algorithm: + +from ..algorithm import VideoSvmPadAlgorithm + +MACHINE_TYPE = 'C_SVC' +KERNEL_TYPE = 'RBF' +N_SAMPLES = 10000 +TRAINER_GRID_SEARCH_PARAMS = {'cost': [2**P for P in range(-3, 14, 2)], 'gamma': [2**P for P in range(-15, 0, 2)]} +MEAN_STD_NORM_FLAG = True # enable mean-std normalization +FRAME_LEVEL_SCORES_FLAG = True # one score per frame(!) in this case +SAVE_DEBUG_DATA_FLAG = True # save the data, which might be useful for debugging +REDUCED_TRAIN_DATA_FLAG = True # reduce the amount of training data in the final training stage +N_TRAIN_SAMPLES = 50000 # number of training samples per class in the final SVM training stage + +algorithm = VideoSvmPadAlgorithm(machine_type = MACHINE_TYPE, + kernel_type = KERNEL_TYPE, + n_samples = N_SAMPLES, + trainer_grid_search_params = TRAINER_GRID_SEARCH_PARAMS, + mean_std_norm_flag = MEAN_STD_NORM_FLAG, + frame_level_scores_flag = FRAME_LEVEL_SCORES_FLAG, + save_debug_data_flag = SAVE_DEBUG_DATA_FLAG, + reduced_train_data_flag = REDUCED_TRAIN_DATA_FLAG, + n_train_samples = N_TRAIN_SAMPLES) +""" +The SVM algorithm with RBF kernel is used to classify the data into *real* and *attack* classes. +One score is produced for each frame of the input video, ``frame_level_scores_flag = True``. +The grid search of SVM parameters is used to select the successful settings. +The grid search is done on the subset of training data. +The size of this subset is defined by ``n_samples`` parameter. +The final training of the SVM is done on the subset of training data ``reduced_train_data_flag = True``. +The size of the subset for the final training stage is defined by the ``n_train_samples`` argument. +The data is also mean-std normalized, ``mean_std_norm_flag = True``. +""" + + diff --git a/bob/pad/face/database/__init__.py b/bob/pad/face/database/__init__.py index abf2f69122cb549489bf3f043f6c66e8c160ed88..bc586be28417c221340e0bf11297282f5cb4e6d6 100644 --- a/bob/pad/face/database/__init__.py +++ b/bob/pad/face/database/__init__.py @@ -1,6 +1,8 @@ from .replay import ReplayPadDatabase from .replay_mobile import ReplayMobilePadDatabase from .msu_mfsd import MsuMfsdPadDatabase +from .aggregated_db import AggregatedDbPadDatabase + # gets sphinx autodoc done right - don't remove it def __appropriate__(*args): @@ -20,6 +22,7 @@ __appropriate__( ReplayPadDatabase, ReplayMobilePadDatabase, MsuMfsdPadDatabase, + AggregatedDbPadDatabase, ) __all__ = [_ for _ in dir() if not _.startswith('_')] diff --git a/bob/pad/face/database/aggregated_db.py b/bob/pad/face/database/aggregated_db.py new file mode 100644 index 0000000000000000000000000000000000000000..353c2d681d6c0f114dd8d1d2a66af383ae2e33e7 --- /dev/null +++ b/bob/pad/face/database/aggregated_db.py @@ -0,0 +1,359 @@ +#!/usr/bin/env python2 +# -*- coding: utf-8 -*- + +#============================================================================== +from bob.pad.base.database import PadFile # Used in ReplayPadFile class + +from bob.pad.base.database import PadDatabase + +# Import HLDI for the databases to aggregate: +from bob.pad.face.database import replay as replay_hldi + +from bob.pad.face.database import replay_mobile as replay_mobile_hldi + +from bob.pad.face.database import msu_mfsd as msu_mfsd_hldi + +import numpy as np + +#============================================================================== +class AggregatedDbPadFile(PadFile): + """ + A high level implementation of the File class for the Aggregated Database + uniting 3 databases: REPLAY-ATTACK, REPLAY-MOBILE and MSU MFSD. + """ + + def __init__(self, f): + """ + **Parameters:** + + ``f`` : :py:class:`object` + An instance of the File class defined in the low level db interface + of the Replay-Attack or Replay-Mobile or MSU MFSD database, respectively + in the bob.db.replay.models.py file or + in the bob.db.replaymobile.models.py file or + in the bob.db.msu_mfsd_mod.models.py file. + """ + + self.f = f + # this f is actually an instance of the File class that is defined in + # bob.db.<database_name>.models and the PadFile class here needs + # client_id, path, attack_type, file_id for initialization. We have to + # convert information here and provide them to PadFile. attack_type is a + # little tricky to get here. Based on the documentation of PadFile: + # In cased of a spoofed data, this parameter should indicate what kind of spoofed attack it is. + # The default None value is interpreted that the PadFile is a genuine or real sample. + if f.is_real(): + attack_type = None + else: + attack_type = 'attack' + # attack_type is a string and I decided to make it like this for this + # particular database. You can do whatever you want for your own database. + + file_path = self.encode_file_path(f) + + file_id = self.encode_file_id(f) + + super(AggregatedDbPadFile, self).__init__(client_id = f.client_id, path = file_path, + attack_type = attack_type, file_id = file_id) + + + #========================================================================== + def encode_file_id(self, f, n = 2000): + """ + Return a modified version of the ``f.id`` ensuring uniqueness of the ids + across all databases. + + **Parameters:** + + ``f`` : :py:class:`object` + An instance of the File class defined in the low level db interface + of the Replay-Attack or Replay-Mobile or MSU MFSD database, respectively + in the bob.db.replay.models.py file or + in the bob.db.replaymobile.models.py file or + in the bob.db.msu_mfsd_mod.models.py file. + + ``n`` : :py:class:`int` + An offset to be added to the file id for different databases is defined + as follows: offset = k*n, where k is the database number, + k = 0,1,2 in our case. Default: 2000. + + **Returns:** + + ``file_id`` : :py:class:`int` + A modified version of the file id, which is now unigue accross + all databases. + """ + + import bob.db.replay + import bob.db.replaymobile + import bob.db.msu_mfsd_mod + + if isinstance(f, bob.db.replay.models.File): # check if instance of File class of LLDI of Replay-Attack + + file_id = f.id + + if isinstance(f, bob.db.replaymobile.models.File): # check if instance of File class of LLDI of Replay-Mobile + + file_id = np.int(f.id + n) + + if isinstance(f, bob.db.msu_mfsd_mod.models.File): # check if instance of File class of LLDI of MSU MFSD + + file_id = np.int(f.id + 2*n) + + return file_id + + + #========================================================================== + def encode_file_path(self, f): + """ + Append the name of the database to the end of the file path separated + with "_". + + **Parameters:** + + ``f`` : :py:class:`object` + An instance of the File class defined in the low level db interface + of the Replay-Attack or Replay-Mobile or MSU MFSD database, respectively + in the bob.db.replay.models.py file or + in the bob.db.replaymobile.models.py file or + in the bob.db.msu_mfsd_mod.models.py file. + + **Returns:** + + ``file_path`` : :py:class:`str` + Modified path to the file, with database name appended to the end + separated with "_". + """ + + import bob.db.replay + import bob.db.replaymobile + import bob.db.msu_mfsd_mod + + if isinstance(f, bob.db.replay.models.File): # check if instance of File class of LLDI of Replay-Attack + + file_path = '_'.join([f.path, 'replay']) + + if isinstance(f, bob.db.replaymobile.models.File): # check if instance of File class of LLDI of Replay-Mobile + + file_path = '_'.join([f.path, 'replaymobile']) + + if isinstance(f, bob.db.msu_mfsd_mod.models.File): # check if instance of File class of LLDI of MSU MFSD + + file_path = '_'.join([f.path, 'msu_mfsd_mod']) + + return file_path + + + #========================================================================== + def load(self, directory=None, extension='.mov'): + """ + Overridden version of the load method defined in the ``PadFile``. + + **Parameters:** + + ``directory`` : :py:class:`str` + String containing the paths to all databases used in this aggregated + database. The paths are separated with a space. + + ``extension`` : :py:class:`str` + Extension of the video files in the REPLAY-ATTACK and REPLAY-MOBILE + databases. The extension of files in MSU MFSD is not taken into account + in the HighLevel DB Interface of MSU MFSD. Default: '.mov'. + + **Returns:** + + ``video_data`` : FrameContainer + Video data stored in the FrameContainer, see ``bob.bio.video.utils.FrameContainer`` + for further details. + """ + + import bob.db.replay + import bob.db.replaymobile + import bob.db.msu_mfsd_mod + + directories = directory.split(" ") + + if isinstance(self.f, bob.db.replay.models.File): # check if instance of File class of LLDI of Replay-Attack + + db_pad_file = replay_hldi.ReplayPadFile(self.f) # replay_hldi is HLDI of Replay-Attack + + directory = directories[0] + + if isinstance(self.f, bob.db.replaymobile.models.File): # check if instance of File class of LLDI of Replay-Mobile + + db_pad_file = replay_mobile_hldi.ReplayMobilePadFile(self.f) # replay_mobile_hldi is HLDI of Replay-Mobile + + directory = directories[1] + + if isinstance(self.f, bob.db.msu_mfsd_mod.models.File): # check if instance of File class of LLDI of MSU MFSD + + db_pad_file = msu_mfsd_hldi.MsuMfsdPadFile(self.f) # msu_mfsd_hldi is HLDI of MSU MFSD + + directory = directories[2] + + video_data = db_pad_file.load(directory = directory, extension = extension) + + return video_data # video data + + +#============================================================================== +class AggregatedDbPadDatabase(PadDatabase): + """ + A high level implementation of the Database class for the Aggregated Database + uniting 3 databases: REPLAY-ATTACK, REPLAY-MOBILE and MSU MFSD. + """ + + def __init__( + self, + protocol='grandtest', # grandtest is the default protocol for this database + original_directory=None, + original_extension=None, + **kwargs): + """ + **Parameters:** + + ``protocol`` : :py:class:`str` or ``None`` + The name of the protocol that defines the default experimental setup + for this database. Default: 'grandtest'. + + ``original_directory`` : :py:class:`str` + String containing the paths to all databases used in this aggregated + database. The paths are separated with a space. Default: None. + + ``original_extension`` : :py:class:`str` + Extension of the video files in the REPLAY-ATTACK and REPLAY-MOBILE + databases. The extension of files in MSU MFSD is not taken into account + in the HighLevel DB Interface of MSU MFSD. Default: None. + + ``kwargs`` + The arguments of the :py:class:`bob.bio.base.database.BioDatabase` base class constructor. + """ + + # Import LLDI for all databases: + import bob.db.replay + import bob.db.replaymobile + import bob.db.msu_mfsd_mod + + self.replay_db = bob.db.replay.Database() + self.replaymobile_db = bob.db.replaymobile.Database() + self.msu_mfsd_db = bob.db.msu_mfsd_mod.Database() + + # Since the high level API expects different group names than what the low + # level API offers, you need to convert them when necessary + self.low_level_group_names = ('train', 'devel', 'test') # group names in the low-level database interface + self.high_level_group_names = ('train', 'dev', 'eval') # names are expected to be like that in objects() function + + # Always use super to call parent class methods. + super(AggregatedDbPadDatabase, self).__init__( + name = 'aggregated_db', + protocol = protocol, + original_directory = original_directory, + original_extension = original_extension, + **kwargs) + + + #========================================================================== + def objects(self, groups=None, protocol=None, purposes=None, model_ids=None, **kwargs): + """ + This function returns a list of AggregatedDbPadFile objects, which fulfill the given restrictions. + + Keyword parameters: + + ``groups`` : :py:class:`str` + OR a list of strings. + The groups of which the clients should be returned. + Usually, groups are one or more elements of ('train', 'dev', 'eval') + + ``protocol`` : :py:class:`str` + The protocol for which the clients should be retrieved. + The protocol is dependent on your database. + If you do not have protocols defined, just ignore this field. + + ``purposes`` : :py:class:`str` + OR a list of strings. + The purposes for which File objects should be retrieved. + Usually it is either 'real' or 'attack'. + + ``model_ids`` + This parameter is not supported in PAD databases yet + + **Returns:** + + ``files`` : [AggregatedDbPadFile] + A list of AggregatedDbPadFile objects. + """ + + # Convert group names to low-level group names here. + groups = self.convert_names_to_lowlevel(groups, self.low_level_group_names, self.high_level_group_names) + # Since this database was designed for PAD experiments, nothing special + # needs to be done here. + replay_files = self.replay_db.objects(protocol=protocol, groups=groups, cls=purposes, **kwargs) + + replaymobile_files = self.replaymobile_db.objects(protocol=protocol, groups=groups, cls=purposes, **kwargs) + + msu_mfsd_files = self.msu_mfsd_db.objects(group=groups, cls=purposes, **kwargs) + + files = replay_files + replaymobile_files + msu_mfsd_files # append all files to a single list + + files = [AggregatedDbPadFile(f) for f in files] + return files + + + #========================================================================== + def annotations(self, f): + """ + Return annotations for a given file object ``f``, which is an instance + of ``AggregatedDbPadFile`` defined in the HLDI of the Aggregated DB. + The ``load()`` method of ``AggregatedDbPadFile`` class (see above) + returns a video, therefore this method returns bounding-box annotations + for each video frame. The annotations are returned as dictionary of dictionaries. + + **Parameters:** + + ``f`` : :py:class:`object` + An instance of ``AggregatedDbPadFile`` defined above. + + **Returns:** + + ``annotations`` : :py:class:`dict` + A dictionary containing the annotations for each frame in the video. + Dictionary structure: ``annotations = {'1': frame1_dict, '2': frame1_dict, ...}``. + Where ``frameN_dict = {'topleft': (row, col), 'bottomright': (row, col)}`` + is the dictionary defining the coordinates of the face bounding box in frame N. + """ + + import bob.db.replay + import bob.db.replaymobile + import bob.db.msu_mfsd_mod + + directories = self.original_directory.split(" ") + + if isinstance(f.f, bob.db.replay.models.File): # check if instance of File class of LLDI of Replay-Attack + + hldi_db = replay_hldi.ReplayPadDatabase(original_directory = directories[0]) + + if isinstance(f.f, bob.db.replaymobile.models.File): # check if instance of File class of LLDI of Replay-Mobile + + hldi_db = replay_mobile_hldi.ReplayMobilePadDatabase(original_directory = directories[1]) + + if isinstance(f.f, bob.db.msu_mfsd_mod.models.File): # check if instance of File class of LLDI of MSU MFSD + + hldi_db = msu_mfsd_hldi.MsuMfsdPadDatabase(original_directory = directories[2]) + + annotations = hldi_db.annotations(f) + + return annotations + + + + + + + + + + + + + + diff --git a/bob/pad/face/database/msu_mfsd.py b/bob/pad/face/database/msu_mfsd.py index 7b183441c2128e222c17725268c6e117328ef8dc..52c7fb8fca9e2ed4594ed465f9d8cba2d353bbd5 100644 --- a/bob/pad/face/database/msu_mfsd.py +++ b/bob/pad/face/database/msu_mfsd.py @@ -181,7 +181,7 @@ class MsuMfsdPadDatabase(PadDatabase): **Returns:** - ``files`` : :py:class:`str` + ``files`` : [MsuMfsdPadFile] A list of MsuMfsdPadFile objects. """ diff --git a/bob/pad/face/database/replay.py b/bob/pad/face/database/replay.py index 1d1a663548a40e4751f6564e68d55779fd816d41..d9656f3dd0639a73fcf698b1944a2b9687eae252 100644 --- a/bob/pad/face/database/replay.py +++ b/bob/pad/face/database/replay.py @@ -148,9 +148,10 @@ class ReplayPadDatabase(PadDatabase): **Returns:** - ``files`` : :py:class:`str` + ``files`` : [ReplayPadFile] A list of ReplayPadFile objects. """ + # Convert group names to low-level group names here. groups = self.convert_names_to_lowlevel(groups, self.low_level_group_names, self.high_level_group_names) # Since this database was designed for PAD experiments, nothing special diff --git a/bob/pad/face/database/replay_mobile.py b/bob/pad/face/database/replay_mobile.py index 773ca43d469e5341500948c12c2837faa61aa0ea..9745e181a9ef6090f0d9c38f0af8d42208bf9bdd 100644 --- a/bob/pad/face/database/replay_mobile.py +++ b/bob/pad/face/database/replay_mobile.py @@ -173,7 +173,7 @@ class ReplayMobilePadDatabase(PadDatabase): **Returns:** - ``files`` : :py:class:`str` + ``files`` : [ReplayMobilePadFile] A list of ReplayMobilePadFile objects. """ diff --git a/doc/api.rst b/doc/api.rst index 8f94169abe5406ee6bfa772b71ff72ff0c5e9153..3b834a8cd2fc9e468bba8b6c78293837ece98dd3 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -31,6 +31,12 @@ MSU MFSD Database .. autoclass:: bob.pad.face.database.msu_mfsd.MsuMfsdPadFile .. autoclass:: bob.pad.face.database.msu_mfsd.MsuMfsdPadDatabase +Aggregated Database +======================== + +.. autoclass:: bob.pad.face.database.aggregated_db.AggregatedDbPadFile +.. autoclass:: bob.pad.face.database.aggregated_db.AggregatedDbPadDatabase + Pre-processors ------------------------------ diff --git a/doc/baselines.rst b/doc/baselines.rst index 80057a25df0a54a75ddaf3ec93105368fd2acabe..410fc0e4dfa0d775ee0db974be6fb68aea10d4b0 100644 --- a/doc/baselines.rst +++ b/doc/baselines.rst @@ -182,7 +182,7 @@ The EER/HTER errors for `replayattack`_ database are summarized in the Table bel +-------------------+----------+----------+ | Protocol | EER,\% | HTER,\% | +===================+==========+==========+ -| ``grandtest`` | 4.321 | 4.570 | +| ``grandtest`` | 3.987 | 4.571 | +-------------------+----------+----------+ The ROC curves for the particular experiment can be downloaded from here: @@ -337,7 +337,7 @@ The EER/HTER errors for the `Replay-Mobile`_ database are summarized in the Tabl +-------------------+----------+----------+ | Protocol | EER,\% | HTER,\% | +===================+==========+==========+ -| ``grandtest`` | 2.772 | 5.886 | +| ``grandtest`` | 1.747 | 4.074 | +-------------------+----------+----------+ The ROC curves for the particular experiment can be downloaded from here: @@ -493,7 +493,7 @@ The EER/HTER errors for the `MSU MFSD`_ database are summarized in the Table bel +-------------------+----------+----------+ | Protocol | EER,\% | HTER,\% | +===================+==========+==========+ -| ``grandtest`` | 4.115 | 5.564 | +| ``grandtest`` | 3.665 | 4.944 | +-------------------+----------+----------+ The ROC curves for the particular experiment can be downloaded from here: @@ -552,5 +552,20 @@ The ROC curves for the particular experiment can be downloaded from here: ------------ +.. _bob.pad.face.baselines.aggregated_db: + +Baselines on Aggregated Database +-------------------------------------- + +This section summarizes the results of baseline face PAD experiments on the Aggregated Database. +The description of the database instance, which can be used to run face PAD experiments on the Aggregated Database is given +here :ref:`bob.pad.face.resources.databases.aggregated_db`. +To understand the settings of this database instance in more details you can check the +corresponding configuration file: ``bob/pad/face/config/database/aggregated_db.py``. + +------------ + + .. include:: links.rst + diff --git a/doc/img/ROC_iqm_svm_msu_mfsd.pdf b/doc/img/ROC_iqm_svm_msu_mfsd.pdf index 740123c376d98fdee99f260e5afa59effe258c36..dd4e80931a21e166f6282bb400eff441fdec9949 100644 Binary files a/doc/img/ROC_iqm_svm_msu_mfsd.pdf and b/doc/img/ROC_iqm_svm_msu_mfsd.pdf differ diff --git a/doc/img/ROC_iqm_svm_replay_attack.pdf b/doc/img/ROC_iqm_svm_replay_attack.pdf index a4524065a5d3f1e41e547ae51f63859db6c505d9..9da9c0f5ce51f260a2612c2bc6e60bfb9f7cf1fe 100644 Binary files a/doc/img/ROC_iqm_svm_replay_attack.pdf and b/doc/img/ROC_iqm_svm_replay_attack.pdf differ diff --git a/doc/img/ROC_iqm_svm_replay_mobile.pdf b/doc/img/ROC_iqm_svm_replay_mobile.pdf index 57b62a2f8a85ad85301c7ac7b5ce127e0d489e29..b57b3a042c648fc601bf79a9d32bf2143f1c3e07 100644 Binary files a/doc/img/ROC_iqm_svm_replay_mobile.pdf and b/doc/img/ROC_iqm_svm_replay_mobile.pdf differ diff --git a/doc/installation.rst b/doc/installation.rst index 504281c7491a85b880760b82dcfd635b4ebe7a9f..52f756617690c0ce2db3b482dcb13f422ef63e72 100644 --- a/doc/installation.rst +++ b/doc/installation.rst @@ -57,8 +57,10 @@ to run the baselines. The current system readily supports the following freely available datasets: -* ``replay-attack``: `replayattack`_ - +* `REPLAYATTACK`_ +* `REPLAY-MOBILE`_ +* `MSU MFSD`_ +* ``Aggregated DB`` After downloading the databases, annotate the base directories in which they are installed. Then, follow the instructions in diff --git a/doc/resources.rst b/doc/resources.rst index 9edada2e3441f26eedb7e24e27074caa51cd7715..2407852e7ffe3777d74e71c60e146f8fef74736d 100644 --- a/doc/resources.rst +++ b/doc/resources.rst @@ -9,7 +9,6 @@ This section contains a listing of all ready-to-use resources you can find in this package. - --------------------------------- @@ -49,6 +48,14 @@ MSU MFSD Database :members: +.. _bob.pad.face.resources.databases.aggregated_db: + +Aggregated Database +================================================================================ + +.. automodule:: bob.pad.face.config.database.aggregated_db + :members: + --------------------------------- @@ -91,3 +98,33 @@ Frame differences based features (motion analysis) + SVM for REPLAY-ATTACK .. automodule:: bob.pad.face.config.frame_diff_svm :members: + + +.. _bob.pad.face.resources.face_pad.lbp_svm_aggregated_db: + +LBP features of facial region + SVM for Aggregated Database +=================================================================================== + +.. automodule:: bob.pad.face.config.lbp_svm_aggregated_db + :members: + + +.. _bob.pad.face.resources.face_pad.qm_svm_aggregated_db: + +Image Quality Measures as features of facial region + SVM for Aggregated Database +=================================================================================== + +.. automodule:: bob.pad.face.config.qm_svm_aggregated_db + :members: + + +.. _bob.pad.face.resources.face_pad.frame_diff_svm_aggregated_db: + +Frame differences based features (motion analysis) + SVM for Aggregated Database +=================================================================================== + +.. automodule:: bob.pad.face.config.frame_diff_svm_aggregated_db + :members: + + + diff --git a/setup.py b/setup.py index eb04789aa236b9abd403cc422ef6a532df15c8d8..e0ac7ee6cbf6fbe56e0b6a9754bded38486ae3db 100644 --- a/setup.py +++ b/setup.py @@ -66,6 +66,7 @@ setup( 'replay = bob.pad.face.config.database.replay:database', 'replay-mobile = bob.pad.face.config.database.replay_mobile:database', 'msu-mfsd = bob.pad.face.config.database.msu_mfsd:database', + 'aggregated-db = bob.pad.face.config.database.aggregated_db:database', ], # registered configurations: @@ -73,8 +74,11 @@ setup( # baselines: 'lbp-svm = bob.pad.face.config.lbp_svm', + 'lbp-svm-aggregated-db = bob.pad.face.config.lbp_svm_aggregated_db', 'qm-svm = bob.pad.face.config.qm_svm', + 'qm-svm-aggregated-db = bob.pad.face.config.qm_svm_aggregated_db', 'frame-diff-svm = bob.pad.face.config.frame_diff_svm', + 'frame-diff-svm-aggregated-db = bob.pad.face.config.frame_diff_svm_aggregated_db', ], # registered preprocessors: