diff --git a/bob/pad/face/database/aggregated_db.py b/bob/pad/face/database/aggregated_db.py index 75e91b3291f7b83d0074b0bfedecbf5954301659..cc584490fcdd20d16f2caaa6c1c89122b8c8a67c 100644 --- a/bob/pad/face/database/aggregated_db.py +++ b/bob/pad/face/database/aggregated_db.py @@ -1,8 +1,8 @@ #!/usr/bin/env python2 # -*- coding: utf-8 -*- -#============================================================================== -from bob.pad.face.database import VideoPadFile # Used in ReplayPadFile class +# ============================================================================= +from bob.pad.face.database import VideoPadFile from bob.pad.base.database import PadDatabase @@ -19,8 +19,7 @@ from bob.bio.video import FrameSelector, FrameContainer import numpy as np - -#============================================================================== +# ============================================================================= class AggregatedDbPadFile(VideoPadFile): """ A high level implementation of the File class for the Aggregated Database @@ -76,7 +75,7 @@ class AggregatedDbPadFile(VideoPadFile): attack_type=attack_type, file_id=file_id) - #========================================================================== + # ========================================================================= def encode_file_id(self, f, n=2000): """ Return a modified version of the ``f.id`` ensuring uniqueness of the ids @@ -134,7 +133,7 @@ class AggregatedDbPadFile(VideoPadFile): return file_id - #========================================================================== + # ========================================================================= def encode_file_path(self, f): """ Append the name of the database to the end of the file path separated @@ -187,7 +186,7 @@ class AggregatedDbPadFile(VideoPadFile): return file_path - #========================================================================== + # ========================================================================= def load(self, directory=None, extension='.mov'): """ Overridden version of the load method defined in the ``VideoPadFile``. @@ -269,7 +268,7 @@ class AggregatedDbPadFile(VideoPadFile): return video_data # video data -#============================================================================== +# ============================================================================= class AggregatedDbPadDatabase(PadDatabase): """ A high level implementation of the Database class for the Aggregated Database @@ -304,11 +303,24 @@ class AggregatedDbPadDatabase(PadDatabase): databases Replay-Attack, Replay-Mobile, MSU MFSD plus some additional data from MOBIO dataset is used in the training set. - 5. "grandtest-train-eval" - - this protocol is using all the data available + 5. "grandtest-train-eval" - this protocol is using all the data available in the databases Replay-Attack, Replay-Mobile, MSU MFSD. Only two gropus 'train' and 'eval' are available in this protocol. The 'dev' set is concatenated to the training data. When requesting 'dev' set, the data of the 'eval' set is returned. + + 6. "grandtest-train-eval-<num_train_samples>" - + this protocol is using all the data available in the databases + Replay-Attack, Replay-Mobile, MSU MFSD. Only two gropus + 'train' and 'eval' are available in this protocol. The 'dev' set is + concatenated to the training data. When requesting 'dev' set, the + data of the 'eval' set is returned. + MOREOVER, in this protocol you can specify the number of training samples + <num_train_samples>, which will be uniformly selected for each database + (Replay-Attack, Replay-Mobile, MSU MFSD) used in the Aggregated DB. + For example, in the protocol "grandtest-train-eval-5", 5 training samples + will be selected for Replay-Attack, 5 for Replay-Mobile, and 5 for + MSU MFSD. The total number of training samples is 15 in this case. """ def __init__( @@ -334,7 +346,8 @@ class AggregatedDbPadDatabase(PadDatabase): in the HighLevel DB Interface of MSU MFSD. Default: None. ``kwargs`` - The arguments of the :py:class:`bob.bio.base.database.BioDatabase` base class constructor. + The arguments of the :py:class:`bob.bio.base.database.BioDatabase` + base class constructor. """ # Import LLDI for all databases: @@ -360,8 +373,8 @@ class AggregatedDbPadDatabase(PadDatabase): # A list of available protocols: self.available_protocols = [ 'grandtest', 'photo-photo-video', 'video-video-photo', - 'grandtest-mobio', 'grandtest-train-eval' - ] + 'grandtest-mobio', 'grandtest-train-eval', + 'grandtest-train-eval-<num_train_samples>'] # Always use super to call parent class methods. super(AggregatedDbPadDatabase, self).__init__( @@ -371,7 +384,7 @@ class AggregatedDbPadDatabase(PadDatabase): original_extension=original_extension, **kwargs) - #========================================================================== + # ========================================================================= def get_mobio_files_given_single_group(self, groups=None, purposes=None): """ Get a list of files for the MOBIO database. All files are bona-fide @@ -429,7 +442,41 @@ class AggregatedDbPadDatabase(PadDatabase): return mobio_files - #========================================================================== + # ========================================================================= + def uniform_select_list_elements(self, data, n_samples): + """ + Uniformly select N elements from the input data list. + + **Parameters:** + + ``data`` : [] + Input list to select elements from. + + ``n_samples`` : :py:class:`int` + The number of samples to be selected uniformly from the input list. + + **Returns:** + + ``selected_data`` : [] + Selected subset of elements. + """ + + if len(data) <= n_samples: + + selected_data = data + + else: + + uniform_step = len(data) / np.float(n_samples + 1) + + idxs = [int(np.round(uniform_step * (x + 1))) + for x in range(n_samples)] + + selected_data = [data[idx] for idx in idxs] + + return selected_data + + # ========================================================================= def get_files_given_single_group(self, groups=None, protocol=None, @@ -479,12 +526,25 @@ class AggregatedDbPadDatabase(PadDatabase): databases Replay-Attack, Replay-Mobile, MSU MFSD plus some additional data from MOBIO dataset is used in the training set. - 5. "grandtest-train-eval" - - this protocol is using all the data available + 5. "grandtest-train-eval" - this protocol is using all the data available in the databases Replay-Attack, Replay-Mobile, MSU MFSD. Only two gropus 'train' and 'test' are available in this protocol. The 'devel' set is concatenated to the training data. When requesting 'devel' set, the data of the 'test' set is returned. + 6. "grandtest-train-eval-<num_train_samples>" - + this protocol is using all the data available in the databases + Replay-Attack, Replay-Mobile, MSU MFSD. Only two gropus + 'train' and 'eval' are available in this protocol. The 'dev' set is + concatenated to the training data. When requesting 'dev' set, the + data of the 'eval' set is returned. + MOREOVER, in this protocol you can specify the number of training samples + <num_train_samples>, which will be uniformly selected for each database + (Replay-Attack, Replay-Mobile, MSU MFSD) used in the Aggregated DB. + For example, in the protocol "grandtest-train-eval-5", 5 training samples + will be selected for Replay-Attack, 5 for Replay-Mobile, and 5 for + MSU MFSD. The total number of training samples is 15 in this case. + ``purposes`` : :py:class:`str` OR a list of strings. The purposes for which File objects should be retrieved. @@ -521,7 +581,8 @@ class AggregatedDbPadDatabase(PadDatabase): if protocol == 'photo-photo-video': - if groups == 'train' or groups == 'devel': # the group names are low-level here: ('train', 'devel', 'test') + # the group names are low-level here: ('train', 'devel', 'test') + if groups == 'train' or groups == 'devel': replay_files = self.replay_db.objects( protocol='photo', groups=groups, cls=purposes, **kwargs) @@ -559,7 +620,8 @@ class AggregatedDbPadDatabase(PadDatabase): if protocol == 'video-video-photo': - if groups == 'train' or groups == 'devel': # the group names are low-level here: ('train', 'devel', 'test') + # the group names are low-level here: ('train', 'devel', 'test') + if groups == 'train' or groups == 'devel': replay_files = self.replay_db.objects( protocol='video', groups=groups, cls=purposes, **kwargs) @@ -611,45 +673,59 @@ class AggregatedDbPadDatabase(PadDatabase): mobio_files = self.get_mobio_files_given_single_group( groups=groups, purposes=purposes) - if protocol == 'grandtest-train-eval': + if protocol is not None: - if groups == 'train': + if 'grandtest-train-eval' in protocol: - replay_files = self.replay_db.objects( - protocol='grandtest', - groups=['train', 'devel'], - cls=purposes, - **kwargs) + if groups == 'train': - replaymobile_files = self.replaymobile_db.objects( - protocol='grandtest', - groups=['train', 'devel'], - cls=purposes, - **kwargs) + replay_files = self.replay_db.objects( + protocol='grandtest', + groups=['train', 'devel'], + cls=purposes, + **kwargs) - msu_mfsd_files = self.msu_mfsd_db.objects( - group=['train', 'devel'], cls=purposes, **kwargs) + replaymobile_files = self.replaymobile_db.objects( + protocol='grandtest', + groups=['train', 'devel'], + cls=purposes, + **kwargs) - if groups in ['devel', 'test']: + msu_mfsd_files = self.msu_mfsd_db.objects( + group=['train', 'devel'], cls=purposes, **kwargs) - replay_files = self.replay_db.objects( - protocol='grandtest', - groups='test', - cls=purposes, - **kwargs) + if len(protocol) > len('grandtest-train-eval'): - replaymobile_files = self.replaymobile_db.objects( - protocol='grandtest', - groups='test', - cls=purposes, - **kwargs) + num_train_samples = [ + int(s) for s in protocol.split("-") if s.isdigit()][-1] - msu_mfsd_files = self.msu_mfsd_db.objects( - group='test', cls=purposes, **kwargs) + replay_files = self.uniform_select_list_elements( + data=replay_files, n_samples=num_train_samples) + replaymobile_files = self.uniform_select_list_elements( + data=replaymobile_files, n_samples=num_train_samples) + msu_mfsd_files = self.uniform_select_list_elements( + data=msu_mfsd_files, n_samples=num_train_samples) + + if groups in ['devel', 'test']: + + replay_files = self.replay_db.objects( + protocol='grandtest', + groups='test', + cls=purposes, + **kwargs) + + replaymobile_files = self.replaymobile_db.objects( + protocol='grandtest', + groups='test', + cls=purposes, + **kwargs) + + msu_mfsd_files = self.msu_mfsd_db.objects( + group='test', cls=purposes, **kwargs) return replay_files, replaymobile_files, msu_mfsd_files, mobio_files - #========================================================================== + # ========================================================================= def get_files_given_groups(self, groups=None, protocol=None, @@ -700,12 +776,25 @@ class AggregatedDbPadDatabase(PadDatabase): databases Replay-Attack, Replay-Mobile, MSU MFSD plus some additional data from MOBIO dataset is used in the training set. - 5. "grandtest-train-eval" - - this protocol is using all the data available + 5. "grandtest-train-eval" - this protocol is using all the data available in the databases Replay-Attack, Replay-Mobile, MSU MFSD. Only two gropus 'train' and 'test' are available in this protocol. The 'devel' set is concatenated to the training data. When requesting 'devel' set, the data of the 'test' set is returned. + 6. "grandtest-train-eval-<num_train_samples>" - + this protocol is using all the data available in the databases + Replay-Attack, Replay-Mobile, MSU MFSD. Only two gropus + 'train' and 'eval' are available in this protocol. The 'dev' set is + concatenated to the training data. When requesting 'dev' set, the + data of the 'eval' set is returned. + MOREOVER, in this protocol you can specify the number of training samples + <num_train_samples>, which will be uniformly selected for each database + (Replay-Attack, Replay-Mobile, MSU MFSD) used in the Aggregated DB. + For example, in the protocol "grandtest-train-eval-5", 5 training samples + will be selected for Replay-Attack, 5 for Replay-Mobile, and 5 for + MSU MFSD. The total number of training samples is 15 in this case. + ``purposes`` : :py:class:`str` OR a list of strings. The purposes for which File objects should be retrieved. @@ -761,7 +850,7 @@ class AggregatedDbPadDatabase(PadDatabase): return replay_files, replaymobile_files, msu_mfsd_files, mobio_files - #========================================================================== + # ========================================================================= def objects(self, groups=None, protocol=None, @@ -808,6 +897,25 @@ class AggregatedDbPadDatabase(PadDatabase): databases Replay-Attack, Replay-Mobile, MSU MFSD plus some additional data from MOBIO dataset is used in the training set. + 5. "grandtest-train-eval" - this protocol is using all the data available + in the databases Replay-Attack, Replay-Mobile, MSU MFSD. Only two gropus + 'train' and 'eval' are available in this protocol. The 'dev' set is + concatenated to the training data. When requesting 'dev' set, the + data of the 'eval' set is returned. + + 6. "grandtest-train-eval-<num_train_samples>" - + this protocol is using all the data available in the databases + Replay-Attack, Replay-Mobile, MSU MFSD. Only two gropus + 'train' and 'eval' are available in this protocol. The 'dev' set is + concatenated to the training data. When requesting 'dev' set, the + data of the 'eval' set is returned. + MOREOVER, in this protocol you can specify the number of training samples + <num_train_samples>, which will be uniformly selected for each database + (Replay-Attack, Replay-Mobile, MSU MFSD) used in the Aggregated DB. + For example, in the protocol "grandtest-train-eval-5", 5 training samples + will be selected for Replay-Attack, 5 for Replay-Mobile, and 5 for + MSU MFSD. The total number of training samples is 15 in this case. + ``purposes`` : :py:class:`str` OR a list of strings. The purposes for which File objects should be retrieved. @@ -835,26 +943,22 @@ class AggregatedDbPadDatabase(PadDatabase): model_ids=model_ids, **kwargs) - # replay_files = self.replay_db.objects(protocol=protocol, groups=groups, cls=purposes, **kwargs) - # - # replaymobile_files = self.replaymobile_db.objects(protocol=protocol, groups=groups, cls=purposes, **kwargs) - # - # msu_mfsd_files = self.msu_mfsd_db.objects(group=groups, cls=purposes, **kwargs) - - files = replay_files + replaymobile_files + msu_mfsd_files + mobio_files # append all files to a single list + files = replay_files + replaymobile_files + msu_mfsd_files + \ + mobio_files # append all files to a single list files = [AggregatedDbPadFile(f) for f in files] return files - #========================================================================== + # ========================================================================= def annotations(self, f): """ Return annotations for a given file object ``f``, which is an instance of ``AggregatedDbPadFile`` defined in the HLDI of the Aggregated DB. The ``load()`` method of ``AggregatedDbPadFile`` class (see above) returns a video, therefore this method returns bounding-box annotations - for each video frame. The annotations are returned as dictionary of dictionaries. + for each video frame. The annotations are returned as dictionary of + dictionaries. **Parameters:**