Commit 7b83407b authored by Amir MOHAMMADI's avatar Amir MOHAMMADI

convert databases to use the new csv format from bob.pipelines

parent f83ac86a
Pipeline #49011 failed with stage
in 7 minutes and 12 seconds
......@@ -3,5 +3,5 @@ Please run ``bob config set bob.db.casia_fasd.directory /path/to/casia_fasd_file
in terminal to point to the original files of the dataset on your computer."""
from bob.pad.face.database import CasiaFasdPadDatabase
from bob.pad.base.pipelines.vanilla_pad import DatabaseConnector
database = DatabaseConnector(CasiaFasdPadDatabase())
database = CasiaFasdPadDatabase()
from bob.pad.face.database import CasiaSurfPadDatabase
from bob.pad.base.pipelines.vanilla_pad import DatabaseConnector
database = DatabaseConnector(CasiaSurfPadDatabase())
database = CasiaSurfPadDatabase()
from bob.pad.face.database import CasiaSurfPadDatabase
from bob.pad.base.pipelines.vanilla_pad import DatabaseConnector
database = DatabaseConnector(CasiaSurfPadDatabase())
database = CasiaSurfPadDatabase(stream_type="color")
"""`CELEBA`_ is a face makeup spoofing database adapted for face PAD experiments.
You can download the raw data of the `CELEBA`_ database by following
the link.
.. include:: links.rst
"""
from bob.pad.base.pipelines.vanilla_pad import DatabaseConnector
from bob.pad.face.database.celeb_a import CELEBAPadDatabase
database = DatabaseConnector(CELEBAPadDatabase())
from bob.pad.face.database import MaskAttackPadDatabase
from bob.pad.base.pipelines.vanilla_pad import DatabaseConnector
database = DatabaseConnector(MaskAttackPadDatabase())
database = MaskAttackPadDatabase()
"""`MIFS`_ is a face makeup spoofing database adapted for face PAD experiments.
Database assembled from a dataset consisting of 107 makeup-transformations taken
from random YouTube makeup video tutorials, adapted in this package for face-PAD
experiments. The public version of the database contains 107 such transformations
with each time two images of a subject before makeup, two images of the same
subject after makeup and two images of the target identity. For this package, a
subset of 104 makeup transformations is selected, the target identities images
discarded and the remaining images randomly distributed in three sets. More
information can be found in the reference [CDSR17]_.
You can download the raw data of the `MIFS`_ database by following
the link.
.. include:: links.rst
"""
from bob.pad.face.database import MIFSPadDatabase
from bob.pad.base.pipelines.vanilla_pad import DatabaseConnector
database = DatabaseConnector(MIFSPadDatabase())
......@@ -2,9 +2,7 @@ from .database import VideoPadFile
from .database import VideoPadSample # noqa: F401
from .casiafasd import CasiaFasdPadDatabase
from .casiasurf import CasiaSurfPadDatabase
from .celeb_a import CELEBAPadDatabase
from .maskattack import MaskAttackPadDatabase
from .mifs import MIFSPadDatabase
from .replay_attack import ReplayAttackPadDatabase
from .replay_mobile import ReplayMobilePadDatabase
......@@ -29,8 +27,6 @@ __appropriate__(
VideoPadFile,
ReplayAttackPadDatabase,
ReplayMobilePadDatabase,
MIFSPadDatabase,
CELEBAPadDatabase,
MaskAttackPadDatabase,
CasiaSurfPadDatabase,
CasiaFasdPadDatabase,
......
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import logging
from bob.extension import rc
from bob.io.video import reader
from bob.pad.base.database import PadDatabase
from bob.pad.face.database import VideoPadFile
from bob.db.base.utils import (
check_parameter_for_validity, check_parameters_for_validity)
from bob.db.base.annotations import read_annotation_file
from bob.ip.facedetect import expected_eye_positions, BoundingBox
import numpy
import os
CASIA_FASD_FRAME_SHAPE = (3, 1280, 720)
class CasiaFasdPadFile(VideoPadFile):
"""
A high level implementation of the File class for the CASIA_FASD database.
"""
def __init__(self, f, original_directory=None, annotation_directory=None):
"""
Parameters
----------
f : object
An instance of the File class defined in the low level db interface
of the CasiaFasd database, in bob.db.casia_fasd.models
"""
self.f = f
self.original_directory = original_directory
self.annotation_directory = annotation_directory
if f.is_real():
attack_type = None
else:
attack_type = 'attack/{}/{}'.format(f.get_type(), f.get_quality())
super(CasiaFasdPadFile, self).__init__(
client_id=str(f.get_clientid()),
path=f.filename,
attack_type=attack_type,
file_id=f.filename)
@property
def frames(self):
"""Yields the frames of the biofile one by one.
Yields
------
:any:`numpy.array`
A frame of the video. The size is :any:`CASIA_FASD_FRAME_SHAPE`.
"""
vfilename = self.make_path(
directory=self.original_directory, extension='.avi')
for frame in reader(vfilename):
# pad frames to 1280 x 720 so they all have the same size
h, w = frame.shape[1:]
H, W = CASIA_FASD_FRAME_SHAPE[1:]
assert h <= H
assert w <= W
frame = numpy.pad(frame, ((0, 0), (0, H - h), (0, W - w)),
mode='constant', constant_values=0)
yield frame
@property
def number_of_frames(self):
"""Returns the number of frames in a video file.
Returns
-------
int
The number of frames.
"""
vfilename = self.make_path(
directory=self.original_directory, extension='.avi')
return reader(vfilename).number_of_frames
@property
def frame_shape(self):
"""Returns the size of each frame in this database.
Returns
-------
(int, int, int)
The (#Channels, Height, Width) which is
:any:`CASIA_FASD_FRAME_SHAPE`.
"""
return CASIA_FASD_FRAME_SHAPE
@property
def annotations(self):
"""Reads the annotations
If the file object has an attribute of annotation_directory, it will read
annotations from there instead of loading annotations that are shipped with the
database.
Returns
-------
annotations : :py:class:`dict`
A dictionary containing the annotations for each frame in the
video. Dictionary structure:
``annotations = {'1': frame1_dict, '2': frame1_dict, ...}``.Where
``frameN_dict = {'topleft': (row, col), 'bottomright':(row, col)}``
is the dictionary defining the coordinates of the face bounding box
in frame N.
"""
if self.annotation_directory is not None:
path = self.make_path(self.annotation_directory, extension=".json")
return read_annotation_file(path, annotation_type="json")
annots = self.f.bbx()
annotations = {}
for i, v in enumerate(annots):
topleft = (v[2], v[1])
bottomright = (v[2] + v[4], v[1] + v[3])
annotations[str(i)] = {'topleft': topleft,
'bottomright': bottomright}
size = (bottomright[0] - topleft[0], bottomright[1] - topleft[1])
bounding_box = BoundingBox(topleft, size)
annotations[str(i)].update(expected_eye_positions(bounding_box))
return annotations
class CasiaFasdPadDatabase(PadDatabase):
"""
A high level implementation of the Database class for the CASIA_FASD
database. Please run ``bob config set bob.db.casia_fasd.directory
/path/to/casia_fasd_files`` in a terminal to point to the original files on
your computer. This interface is different from the one implemented in
``bob.db.casia_fasd.Database``.
"""
def __init__(
self,
# grandtest is the new modified protocol for this database
protocol='grandtest',
original_directory=rc['bob.db.casia_fasd.directory'],
annotation_directory=None,
**kwargs):
"""
Parameters
----------
protocol : str or None
The name of the protocol that defines the default experimental
setup for this database. Only grandtest is supported for now.
original_directory : str
The directory where the original data of the database are stored.
kwargs
The arguments of the :py:class:`bob.pad.base.database.PadDatabase`
base class constructor.
"""
return super(CasiaFasdPadDatabase, self).__init__(
name='casiafasd',
protocol=protocol,
original_directory=original_directory,
original_extension='.avi',
annotation_directory=annotation_directory,
training_depends_on_protocol=True,
**kwargs)
def objects(self,
groups=None,
protocol=None,
purposes=None,
model_ids=None,
**kwargs):
"""
This function returns lists of CasiaFasdPadFile objects, which fulfill
the given restrictions.
Parameters
----------
groups : :obj:`str` or [:obj:`str`]
The groups of which the clients should be returned.
Usually, groups are one or more elements of
('train', 'dev', 'eval')
protocol : str
The protocol for which the clients should be retrieved.
Only 'grandtest' is supported for now. This protocol modifies the
'Overall Test' protocol and adds some ids to dev set.
purposes : :obj:`str` or [:obj:`str`]
The purposes for which File objects should be retrieved either
'real' or 'attack' or both.
model_ids
Ignored.
**kwargs
Ignored.
Returns
-------
files : [CasiaFasdPadFile]
A list of CasiaFasdPadFile objects.
"""
groups = check_parameters_for_validity(
groups, 'groups', ('train', 'dev', 'eval'),
('train', 'dev', 'eval'))
protocol = check_parameter_for_validity(
protocol, 'protocol', ('grandtest'), 'grandtest')
purposes = check_parameters_for_validity(
purposes, 'purposes', ('real', 'attack'), ('real', 'attack'))
qualities = ('low', 'normal', 'high')
types = ('warped', 'cut', 'video')
from bob.db.casia_fasd.models import File
files = []
db_mappings = {
'real_normal': '1',
'real_low': '2',
'real_high': 'HR_1',
'warped_normal': '3',
'warped_low': '4',
'warped_high': 'HR_2',
'cut_normal': '5',
'cut_low': '6',
'cut_high': 'HR_3',
'video_normal': '7',
'video_low': '8',
'video_high': 'HR_4'
}
# identitites 1-15 are for train, 16-20 are dev, and 21-50 for eval
grp_id_map = {
'train': list(range(1, 16)),
'dev': list(range(16, 21)),
'eval': list(range(21, 51)),
}
grp_map = {
'train': 'train',
'dev': 'train',
'eval': 'test',
}
for g in groups:
ids = grp_id_map[g]
for i in ids:
cur_id = i
if g == 'eval':
cur_id = i - 20
# the id within the group subset
# this group name (grp) is only train and test
grp = grp_map[g]
folder_name = grp + '_release'
for q in qualities:
for c in purposes:
# the class real doesn't have any different types, only
# the attacks can be of different type
if c == 'real':
filename = os.path.join(folder_name, "%d" % cur_id,
db_mappings['real_' + q])
files.append(CasiaFasdPadFile(
File(filename, c, grp),
self.original_directory))
else:
for t in types:
filename = os.path.join(
folder_name, "%d" % cur_id,
db_mappings[t + '_' + q])
files.append(CasiaFasdPadFile(
File(filename, c, grp),
original_directory=self.original_directory,
annotation_directory=self.annotation_directory))
return files
def annotations(self, padfile):
return padfile.annotations
def frames(self, padfile):
return padfile.frames
def number_of_frames(self, padfile):
return padfile.number_of_frames
@property
def frame_shape(self):
return CASIA_FASD_FRAME_SHAPE
from bob.extension.download import get_file
from bob.pad.base.database import FileListPadDatabase
from bob.pad.face.database import VideoPadSample
logger = logging.getLogger(__name__)
def CasiaFasdPadDatabase(
selection_style=None,
max_number_of_frames=None,
step_size=None,
annotation_directory=None,
annotation_type=None,
fixed_positions=None,
**kwargs,
):
name = "pad-face-casia-fasd-e00ce410.tar.gz"
dataset_protocols_path = get_file(
name,
[f"http://www.idiap.ch/software/bob/data/bob/bob.pad.face/{name}"],
cache_subdir="protocols",
file_hash="e00ce410",
)
transformer = VideoPadSample(
original_directory=rc.get("bob.db.casia_fasd.directory"),
annotation_directory=annotation_directory,
selection_style=selection_style,
max_number_of_frames=max_number_of_frames,
step_size=step_size,
)
database = FileListPadDatabase(
dataset_protocols_path,
protocol="grandtest",
transformer=transformer,
**kwargs,
)
database.annotation_type = annotation_type
database.fixed_positions = fixed_positions
return database
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import logging
import os
import numpy as np
import bob.io.video
from bob.pad.face.database import VideoPadFile
from bob.pad.base.database import PadDatabase
from functools import partial
import bob.io.base
from bob.bio.video import VideoLikeContainer
from bob.extension import rc
from bob.extension.download import get_file
from bob.pad.base.database import FileListPadDatabase
from bob.pipelines import DelayedSample
from sklearn.preprocessing import FunctionTransformer
class CasiaSurfPadFile(VideoPadFile):
"""
A high level implementation of the File class for the CASIA-SURF database.
logger = logging.getLogger(__name__)
Note that this does not represent a file per se, but rather a sample
that may contain more than one file.
Attributes
----------
f : :py:class:`object`
An instance of the Sample class defined in the low level db interface
of the CASIA-SURF database, in the bob.db.casiasurf.models.py file.
def load_multi_stream(mods, paths):
retval = {}
for mod, path in zip(mods, paths):
data = bob.io.base.load(path)
fc = VideoLikeContainer(data, [0])
retval[mod] = fc
"""
if len(retval) == 1:
retval = retval[mods[0]]
return retval
def casia_surf_multistream_load(samples, original_directory, stream_type):
mod_to_attr = {}
mod_to_attr["color"] = "filename"
mod_to_attr["infrared"] = "ir_filename"
mod_to_attr["depth"] = "depth_filename"
def __init__(self, s, stream_type):
""" Init
Parameters
----------
s : :py:class:`object`
An instance of the Sample class defined in the low level db interface
of the CASIA-SURF database, in the bob.db.casiasurf.models.py file.
stream_type: str of list of str
The streams to be loaded.
"""
self.s = s
self.stream_type = stream_type
if not isinstance(s.attack_type, str):
attack_type = str(s.attack_type)
else:
attack_type = s.attack_type
if attack_type == '0':
attack_type = None
super(CasiaSurfPadFile, self).__init__(
client_id=s.id,
file_id=s.id,
attack_type=attack_type,
path=s.id)
def load(self, directory=rc['bob.db.casiasurf.directory'], extension='.jpg'):
"""Overloaded version of the load method defined in ``VideoPadFile``.
Parameters
----------
directory : :py:class:`str`
String containing the path to the CASIA-SURF database
extension : :py:class:`str`
Extension of the image files
frame_selector : :py:class:`bob.bio.video.FrameSelector`
The frame selector to use.
Returns
-------
dict:
image data for multiple streams stored in the dictionary.
The structure of the dictionary: ``data={"stream1_name" : numpy array, "stream2_name" : numpy array}``
Names of the streams are defined in ``self.stream_type``.
"""
return self.s.load(directory, extension, modality=self.stream_type)
class CasiaSurfPadDatabase(PadDatabase):
"""High level implementation of the Database class for the 3DMAD database.
Note that at the moment, this database only contains a training and validation set.
The protocol specifies the modality(ies) to load.
Attributes
mods = []
if isinstance(stream_type, str) and stream_type != "all":
mods = [stream_type]
elif isinstance(stream_type, str) and stream_type == "all":
mods = ["color", "infrared", "depth"]
else:
for m in stream_type:
mods.append(m)
def _load(sample):
paths = []
for mod in mods:
paths.append(
os.path.join(original_directory or "", getattr(sample, mod_to_attr[mod]))
)
data = partial(load_multi_stream, mods, paths)
return DelayedSample(data, parent=sample, annotations=None)
return [_load(s) for s in samples]
def CasiaSurfMultiStreamSample(original_directory, stream_type):
return FunctionTransformer(
casia_surf_multistream_load,
kw_args=dict(original_directory=original_directory, stream_type=stream_type),
)
def CasiaSurfPadDatabase(
stream_type="all",
**kwargs,
):
"""The CASIA SURF Face PAD database interface.
Parameters
----------
db : :py:class:`bob.db.casiasurf.Database`
the low-level database interface
low_level_group_names : list of :py:obj:`str`
the group names in the low-level interface (world, dev, test)
high_level_group_names : list of :py:obj:`str`
the group names in the high-level interface (train, dev, eval)
stream_type : str
A str or a list of str of the following choices: ``all``, ``color``, ``depth``, ``infrared``, by default ``all``
The returned sample either have their data as a VideoLikeContainer or
a dict of VideoLikeContainers depending on the chosen stream_type.
"""
def __init__(self, protocol='all', original_directory=rc['bob.db.casiasurf.directory'], original_extension='.jpg', **kwargs):
"""Init function
Parameters
----------
protocol : :py:class:`str`
The name of the protocol that defines the default experimental setup for this database.
original_directory : :py:class:`str`
The directory where the original data of the database are stored.
original_extension : :py:class:`str`
The file name extension of the original data.
"""
from bob.db.casiasurf import Database as LowLevelDatabase
self.db = LowLevelDatabase()
self.low_level_group_names = ('train', 'validation', 'test')
self.high_level_group_names = ('train', 'dev', 'eval')
super(CasiaSurfPadDatabase, self).__init__(
name='casiasurf',
protocol=protocol,
original_directory=original_directory,
original_extension=original_extension,
**kwargs)
@property
def original_directory(self):
return self.db.original_directory
@original_directory.setter
def original_directory(self, value):
self.db.original_directory = value
def objects(self,
groups=None,
protocol='all',
purposes=None,
model_ids=None,
**kwargs):
"""Returns a list of CasiaSurfPadFile objects, which fulfill the given restrictions.
Parameters
----------
groups : list of :py:class:`str`
The groups of which the clients should be returned.
Usually, groups are one or more elements of ('train', 'dev', 'eval')
protocol : :py:class:`str`
The protocol for which the samples should be retrieved.
purposes : :py:class:`str`
The purposes for which Sample objects should be retrieved.
Usually it is either 'real' or 'attack'
model_ids
This parameter is not supported in PAD databases yet.
Returns
-------
samples : :py:class:`CasiaSurfPadFilePadFile`
A list of CasiaSurfPadFile objects.
"""
groups = self.convert_names_to_lowlevel(groups, self.low_level_group_names, self.high_level_group_names)
if groups is not None:
# for training
lowlevel_purposes = []
if 'train' in groups and 'real' in purposes:
lowlevel_purposes.append('real')
if 'train' in groups and 'attack' in purposes:
lowlevel_purposes.append('attack')
# for dev
if 'validation' in groups and 'real' in purposes:
lowlevel_purposes.append('real')
if 'validation' in groups and 'attack' in purposes:
lowlevel_purposes.append('attack')
# for eval
if 'test' in groups and 'real' in purposes:
lowlevel_purposes.append('real')
if 'test' in groups and 'attack' in purposes:
lowlevel_purposes.append('attack')
samples = self.db.objects(groups=groups, purposes=lowlevel_purposes, **kwargs)
samples = [CasiaSurfPadFile(s, stream_type=protocol) for s in samples]
return samples
def annotations(self, file):
"""No annotations are provided with this DB
"""
return None
name = "pad-face-casia-surf-252f86f2.tar.gz"
dataset_protocols_path = get_file(
name,
[f"http://www.idiap.ch/software/bob/data/bob/bob.pad.face/{name}"],
cache_subdir="protocols",
file_hash="252f86f2",
)
transformer = CasiaSurfMultiStreamSample(
original_directory=rc.get("bob.db.casiasurf.directory"),
stream_type=stream_type,
)
database = FileListPadDatabase(
dataset_protocols_path,
protocol="all",
transformer=transformer,
**kwargs,
)
database.annotation_type = None
database.fixed_positions = None
return database
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
#!/usr/bin/env python
#==============================================================================
import bob.bio.video # Used in CELEBAPadFile class
import bob.io.base
import numpy as np
from bob.pad.base.database import PadFile # Used in ReplayPadFile class
from bob.pad.base.database import FileListPadDatabase