From e9a2161897c42b5cb1ba45ca00826f4ac25bcff2 Mon Sep 17 00:00:00 2001 From: Olegs NIKISINS <onikisins@italix03.idiap.ch> Date: Tue, 11 Jul 2017 13:59:29 +0200 Subject: [PATCH] Added HLDI for the MSU MFSD database + congig file + unit tests --- bob/pad/face/config/database/msu_mfsd.py | 48 +++++ bob/pad/face/database/__init__.py | 2 + bob/pad/face/database/msu_mfsd.py | 235 +++++++++++++++++++++ bob/pad/face/preprocessor/VideoFaceCrop.py | 2 +- bob/pad/face/test/test_databases.py | 21 +- setup.py | 1 + test-requirements.txt | 1 + 7 files changed, 308 insertions(+), 2 deletions(-) create mode 100644 bob/pad/face/config/database/msu_mfsd.py create mode 100644 bob/pad/face/database/msu_mfsd.py diff --git a/bob/pad/face/config/database/msu_mfsd.py b/bob/pad/face/config/database/msu_mfsd.py new file mode 100644 index 00000000..7dcef4b9 --- /dev/null +++ b/bob/pad/face/config/database/msu_mfsd.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python + +"""`MSU MFSD`_ is a database for face PAD experiments. + +Database created at MSU, for face-PAD experiments. The public version of the database (available here) contains +280 videos corresponding to 35 clients. The videos are grouped as 'genuine' and 'attack'. +The attack videos have been constructed from the genuine ones, +and consist of three kinds: print, iPad (video-replay), and iPhone (video-replay). +Face-locations are also provided for each frame of each video, but some (6 videos) face-locations are not reliable, +because the videos are not correctly oriented. +The reference citation is [XXX]_. + +You can download the raw data of the `MSU MFSD`_ database by following +the link. + +.. include:: links.rst +""" + +from bob.pad.face.database import MsuMfsdPadDatabase + + +# Directory where the data files are stored. +# This directory is given in the .bob_bio_databases.txt file located in your home directory +original_directory = "[YOUR_MSU_MFSD_DIRECTORY]" +"""Value of ``~/.bob_bio_databases.txt`` for this database""" + +original_extension = "none" # extension is not used to load the data in the HLDI of this database + +database = MsuMfsdPadDatabase( + protocol='grandtest', + original_directory=original_directory, + original_extension=original_extension, + training_depends_on_protocol=True, +) +"""The :py:class:`bob.pad.base.database.PadDatabase` derivative with MSU MFSD +database settings. + +.. warning:: + + This class only provides a programmatic interface to load data in an orderly + manner, respecting usage protocols. It does **not** contain the raw + data files. You should procure those yourself. + +Notice that ``original_directory`` is set to ``[YOUR_MSU_MFSD_DIRECTORY]``. +You must make sure to create ``${HOME}/.bob_bio_databases.txt`` setting this +value to the place where you actually installed the Replay-Mobile Database, as +explained in the section :ref:`bob.pad.face.baselines`. +""" \ No newline at end of file diff --git a/bob/pad/face/database/__init__.py b/bob/pad/face/database/__init__.py index 7b20112a..abf2f691 100644 --- a/bob/pad/face/database/__init__.py +++ b/bob/pad/face/database/__init__.py @@ -1,5 +1,6 @@ from .replay import ReplayPadDatabase from .replay_mobile import ReplayMobilePadDatabase +from .msu_mfsd import MsuMfsdPadDatabase # gets sphinx autodoc done right - don't remove it def __appropriate__(*args): @@ -18,6 +19,7 @@ def __appropriate__(*args): __appropriate__( ReplayPadDatabase, ReplayMobilePadDatabase, + MsuMfsdPadDatabase, ) __all__ = [_ for _ in dir() if not _.startswith('_')] diff --git a/bob/pad/face/database/msu_mfsd.py b/bob/pad/face/database/msu_mfsd.py new file mode 100644 index 00000000..7b183441 --- /dev/null +++ b/bob/pad/face/database/msu_mfsd.py @@ -0,0 +1,235 @@ +#!/usr/bin/env python2 +# -*- coding: utf-8 -*- + + +#============================================================================== +import bob.bio.video # Used in MsuMfsdPadFile class + +from bob.pad.base.database import PadFile # Used in MsuMfsdPadFile class + +from bob.pad.base.database import PadDatabase + +import os + +import numpy as np + +#============================================================================== +class MsuMfsdPadFile(PadFile): + """ + A high level implementation of the File class for the MSU MFSD database. + """ + + def __init__(self, f): + """ + **Parameters:** + + ``f`` : :py:class:`object` + An instance of the File class defined in the low level db interface + of the MSU MFSD database, in the bob.db.msu_mfsd_mod.models.py file. + """ + + self.f = f + # this f is actually an instance of the File class that is defined in + # bob.db.msu_mfsd_mod.models and the PadFile class here needs + # client_id, path, attack_type, file_id for initialization. We have to + # convert information here and provide them to PadFile. attack_type is a + # little tricky to get here. Based on the documentation of PadFile: + # In cased of a spoofed data, this parameter should indicate what kind of spoofed attack it is. + # The default None value is interpreted that the PadFile is a genuine or real sample. + if f.is_real(): + attack_type = None + else: + attack_type = 'attack' + # attack_type is a string and I decided to make it like this for this + # particular database. You can do whatever you want for your own database. + + super(MsuMfsdPadFile, self).__init__(client_id=f.client_id, path=f.path, + attack_type=attack_type, file_id=f.id) + + + #========================================================================== + def convert_arr_to_frame_cont(self, data): + """ + This function converts an input 4D array with frames into FrameContainer, + where each frame is an RGB image. The dimensionality of the input array + is [N_frames, 3, N_rows, N_cols]. + + **Parameters:** + + ``data`` : 4D :py:class:`numpy.ndarray` + An input 4D array with frames of the dimensionality: + [N_frames, 3, N_rows, N_cols]. + + **Returns:** + + ``frames`` : FrameContainer + Resulting FrameContainer containing RGB frames. + """ + + frames = bob.bio.video.FrameContainer() # initialize the FrameContainer + + for idx, sample in enumerate(data): + + frames.add(idx, sample) + + return frames + + + #========================================================================== + def load(self, directory=None, extension=None): + """ + Overridden version of the load method defined in the ``PadFile``. + + **Parameters:** + + ``directory`` : :py:class:`str` + String containing the path to the MSU MFSD database. + Default: None + + ``extension`` : :py:class:`str` + Extension of the video files in the MSU MFSD database. + Note: ``extension`` value is not used in the code of this method. + Default: None + + **Returns:** + + ``video_data`` : FrameContainer + Video data stored in the FrameContainer, see ``bob.bio.video.utils.FrameContainer`` + for further details. + """ + + _, extension = os.path.splitext(self.f.videofile()) # get file extension + + video_data_array = self.f.load(directory = directory, + extension = extension) + + video_data = self.convert_arr_to_frame_cont(video_data_array) # the result is now a FrameContainer + + return video_data + + +#============================================================================== +class MsuMfsdPadDatabase(PadDatabase): + """ + A high level implementation of the Database class for the MSU MFSD database. + """ + + def __init__( + self, + protocol='grandtest', # grandtest is the default protocol for this database + original_directory=None, + original_extension=None, + **kwargs): + """ + **Parameters:** + + ``protocol`` : :py:class:`str` or ``None`` + The name of the protocol that defines the default experimental setup for this database. + + ``original_directory`` : :py:class:`str` + The directory where the original data of the database are stored. + + ``original_extension`` : :py:class:`str` + The file name extension of the original data. + + ``kwargs`` + The arguments of the :py:class:`bob.bio.base.database.BioDatabase` base class constructor. + """ + + from bob.db.msu_mfsd_mod import Database as LowLevelDatabase + + self.db = LowLevelDatabase() + + # Since the high level API expects different group names than what the low + # level API offers, you need to convert them when necessary + self.low_level_group_names = ('train', 'devel', 'test') # group names in the low-level database interface + self.high_level_group_names = ('train', 'dev', 'eval') # names are expected to be like that in objects() function + + # Always use super to call parent class methods. + super(MsuMfsdPadDatabase, self).__init__( + name = 'msu-mfsd', + protocol = protocol, + original_directory = original_directory, + original_extension = original_extension, + **kwargs) + + + #========================================================================== + def objects(self, groups=None, protocol=None, purposes=None, model_ids=None, **kwargs): + """ + This function returns lists of MsuMfsdPadFile objects, which fulfill the given restrictions. + + Keyword parameters: + + ``groups`` : :py:class:`str` + OR a list of strings. + The groups of which the clients should be returned. + Usually, groups are one or more elements of ('train', 'dev', 'eval') + + ``protocol`` : :py:class:`str` + The protocol for which the clients should be retrieved. + Note: this argument is not used in the code, because ``objects`` method of the + low-level BD interface of the MSU MFSD doesn't have ``protocol`` argument. + + ``purposes`` : :py:class:`str` + OR a list of strings. + The purposes for which File objects should be retrieved. + Usually it is either 'real' or 'attack'. + + ``model_ids`` + This parameter is not supported in PAD databases yet. + + **Returns:** + + ``files`` : :py:class:`str` + A list of MsuMfsdPadFile objects. + """ + + # Convert group names to low-level group names here. + groups = self.convert_names_to_lowlevel(groups, self.low_level_group_names, self.high_level_group_names) + # Since this database was designed for PAD experiments, nothing special + # needs to be done here. + files = self.db.objects(group=groups, cls=purposes, **kwargs) + + files = [MsuMfsdPadFile(f) for f in files] + + return files + + + #========================================================================== + def annotations(self, f): + """ + Return annotations for a given file object ``f``, which is an instance + of ``MsuMfsdPadFile`` defined in the HLDI of the MSU MFSD DB. + The ``load()`` method of ``MsuMfsdPadFile`` class (see above) + returns a video, therefore this method returns bounding-box annotations + for each video frame. The annotations are returned as dictionary of dictionaries. + + **Parameters:** + + ``f`` : :py:class:`object` + An instance of ``MsuMfsdPadFile`` defined above. + + **Returns:** + + ``annotations`` : :py:class:`dict` + A dictionary containing the annotations for each frame in the video. + Dictionary structure: ``annotations = {'1': frame1_dict, '2': frame1_dict, ...}``. + Where ``frameN_dict = {'topleft': (row, col), 'bottomright': (row, col)}`` + is the dictionary defining the coordinates of the face bounding box in frame N. + """ + + annots = f.f.bbx(directory=self.original_directory) # numpy array containing the face bounding box data for each video frame, returned data format described in the f.bbx() method of the low level interface + + annotations = {} # dictionary to return + + for frame_annots in annots: + + topleft = ( np.int( frame_annots[2] ), np.int( frame_annots[1] ) ) + bottomright = ( np.int( frame_annots[2] + frame_annots[4] ), np.int( frame_annots[1] + frame_annots[3] ) ) + + annotations[str( np.int( frame_annots[0] ) )] = {'topleft': topleft, 'bottomright': bottomright} + + return annotations + + diff --git a/bob/pad/face/preprocessor/VideoFaceCrop.py b/bob/pad/face/preprocessor/VideoFaceCrop.py index 02a4371a..85c559e2 100644 --- a/bob/pad/face/preprocessor/VideoFaceCrop.py +++ b/bob/pad/face/preprocessor/VideoFaceCrop.py @@ -165,7 +165,7 @@ class VideoFaceCrop(Preprocessor, object): selected_frame_idx = 0 - for idx in range(0, len(annotations)): # idx - frame index + for idx in range(0, np.min( [len(annotations), len(frame_container)] )): # idx - frame index frame_annotations = annotations[str(idx)] # annotations for particular frame diff --git a/bob/pad/face/test/test_databases.py b/bob/pad/face/test/test_databases.py index 68a61e4e..6c628bd2 100644 --- a/bob/pad/face/test/test_databases.py +++ b/bob/pad/face/test/test_databases.py @@ -38,4 +38,23 @@ def test_replaymobile(): except IOError as e: raise SkipTest( - "The database could not be queried; probably the db.sql3 file is missing. Here is the error: '%s'" % e) \ No newline at end of file + "The database could not be queried; probably the db.sql3 file is missing. Here is the error: '%s'" % e) + + +@db_available('msu_mfsd_mod') # the name of the package defining low-level interface of MSU MFSD +def test_msu_mfsd(): + msu_mfsd = bob.bio.base.load_resource('msu-mfsd', 'database', preferred_package='bob.pad.face', package_prefix='bob.pad.') + try: + + assert len( msu_mfsd.objects(groups=['train', 'dev', 'eval']) )== 280 + assert len( msu_mfsd.objects(groups=['train', 'dev']) ) == 160 + assert len( msu_mfsd.objects(groups=['train']) ) == 80 + assert len( msu_mfsd.objects(groups=['train', 'dev', 'eval'], protocol = 'grandtest') )== 280 + assert len( msu_mfsd.objects(groups=['train', 'dev', 'eval'], protocol = 'grandtest', purposes='real') ) == 70 + assert len( msu_mfsd.objects(groups=['train', 'dev', 'eval'], protocol = 'grandtest', purposes='attack') ) == 210 + + except IOError as e: + raise SkipTest( + "The database could not be queried; probably the db.sql3 file is missing. Here is the error: '%s'" % e) + + diff --git a/setup.py b/setup.py index 27af5799..db586b2b 100644 --- a/setup.py +++ b/setup.py @@ -97,6 +97,7 @@ setup( 'bob.pad.database': [ 'replay = bob.pad.face.config.database.replay:database', 'replay-mobile = bob.pad.face.config.database.replay_mobile:database', + 'msu-mfsd = bob.pad.face.config.database.msu_mfsd:database', ], # registered configurations: diff --git a/test-requirements.txt b/test-requirements.txt index 6a55b2f9..f7509181 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1,2 +1,3 @@ bob.db.replay bob.db.replaymobile +bob.db.msu_mfsd_mod \ No newline at end of file -- GitLab