Commit 9a3f8488 authored by Amir MOHAMMADI's avatar Amir MOHAMMADI

WIP: port databases to the new csv format

parent fb28b9f4
Pipeline #48602 failed with stage
in 10 minutes and 58 seconds
......@@ -13,7 +13,10 @@ else:
# Preprocessor #
cropper = face_crop_solver(
cropped_image_size=64, cropped_positions=annotation_type, color_channel="gray"
cropped_image_size=64,
cropped_positions=annotation_type,
color_channel="gray",
fixed_positions=fixed_positions,
)
preprocessor = VideoWrapper(cropper)
preprocessor = mario.wrap(
......
......@@ -13,6 +13,5 @@ the link.
.. include:: links.rst
"""
from bob.pad.face.database import ReplayMobilePadDatabase
from bob.pad.base.pipelines.vanilla_pad import DatabaseConnector
database = DatabaseConnector(ReplayMobilePadDatabase())
database = ReplayMobilePadDatabase()
from .database import VideoPadFile
from .database import VideoPadFile, VideoPadSample
from .replay import ReplayPadDatabase
from .replay_mobile import ReplayMobilePadDatabase
from .mifs import MIFSPadDatabase
......
from functools import partial
import os
from bob.pad.base.database import PadFile
import bob.bio.video
import bob.io.video
from bob.db.base.annotations import read_annotation_file
from sklearn.preprocessing import FunctionTransformer
from bob.bio.video import VideoAsArray
from bob.pipelines import DelayedSample
def delayed_video_load(
samples,
original_directory,
annotation_directory=None,
selection_style=None,
max_number_of_frames=None,
step_size=None,
get_transform=None,
):
if get_transform is None:
def get_transform(x):
return None
original_directory = original_directory or ""
annotation_directory = annotation_directory or ""
results = []
for sample in samples:
video_path = os.path.join(original_directory, sample.filename)
data = partial(
VideoAsArray,
path=video_path,
selection_style=selection_style,
max_number_of_frames=max_number_of_frames,
step_size=step_size,
transform=get_transform(sample),
)
annotations, delayed_attributes = None, None
if annotation_directory:
path = os.path.splitext(sample.filename)[0]
delayed_annotations = partial(
read_annotation_file,
file_name=f"{annotation_directory}:{path}.json",
annotation_type="json",
)
delayed_attributes = {"annotations": delayed_annotations}
results.append(
DelayedSample(
data,
parent=sample,
delayed_attributes=delayed_attributes,
annotations=annotations,
)
)
return results
def VideoPadSample(
original_directory,
annotation_directory=None,
selection_style=None,
max_number_of_frames=None,
step_size=None,
get_transform=None,
):
return FunctionTransformer(
delayed_video_load,
validate=False,
kw_args=dict(
original_directory=original_directory,
annotation_directory=annotation_directory,
selection_style=selection_style,
max_number_of_frames=max_number_of_frames,
step_size=step_size,
get_transform=get_transform,
),
)
class VideoPadFile(PadFile):
......
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
import logging
import os
# Used in ReplayMobilePadFile class
from bob.pad.base.database import PadDatabase
from bob.pad.face.database import VideoPadFile
from bob.pad.face.utils import number_of_frames
from bob.db.base.annotations import read_annotation_file
import numpy as np
from bob.extension import rc
REPLAYMOBILE_FRAME_SHAPE = (3, 1280, 720)
class ReplayMobilePadFile(VideoPadFile):
"""
A high level implementation of the File class for the Replay-Mobile
database.
"""
def __init__(self, f):
"""
Parameters
----------
f : object
An instance of the File class defined in the low level db interface
of the Replay-Mobile database, in the bob.db.replaymobile.models.py
file.
"""
self.f = f
# this f is actually an instance of the File class that is defined in
# bob.db.replaymobile.models and the PadFile class here needs
# client_id, path, attack_type, file_id for initialization. We have to
# convert information here and provide them to PadFile. attack_type is
# a little tricky to get here. Based on the documentation of PadFile:
# In cased of a spoofed data, this parameter should indicate what kind
# of spoofed attack it is. The default None value is interpreted that
# the PadFile is a genuine or real sample.
if f.is_real():
attack_type = None
else:
attack_type = 'attack'
# attack_type is a string and I decided to make it like this for this
# particular database. You can do whatever you want for your own
# database.
super(ReplayMobilePadFile, self).__init__(
client_id=f.client_id,
path=f.path,
attack_type=attack_type,
file_id=f.id)
def load(self):
"""
Overridden version of the load method defined in the ``VideoPadFile``.
Returns
-------
video_data : :any:`bob.bio.video.VideoAsArray`
Video data.
"""
# TODO(amir): Handle loading with VideoAsArray and with a transform as
# some video files need to be flipped.
video_data_array = self.f.load(
directory=self.original_directory, extension=self.original_extension)
return video_data_array
@property
def annotations(self):
from bob.db.replaymobile.models import replaymobile_annotations
if self.annotation_directory is not None:
# return the external annotations
annotations = read_annotation_file(
self.make_path(self.annotation_directory,
self.annotation_extension),
self.annotation_type)
return annotations
# return original annotations
return replaymobile_annotations(self.f, self.original_directory)
@property
def frames(self):
from bob.db.replaymobile.models import replaymobile_frames
return replaymobile_frames(self.f, self.original_directory)
@property
def number_of_frames(self):
vfilename = self.make_path(
directory=self.original_directory,
extension='.mov')
return number_of_frames(vfilename)
@property
def frame_shape(self):
return REPLAYMOBILE_FRAME_SHAPE
class ReplayMobilePadDatabase(PadDatabase):
"""
A high level implementation of the Database class for the Replay-Mobile
database.
"""
def __init__(
self,
protocol='grandtest',
original_directory=rc['bob.db.replaymobile.directory'],
original_extension='.mov',
annotation_directory=None,
annotation_extension='.json',
annotation_type='json',
**kwargs):
"""
Parameters
----------
protocol : str or None
The name of the protocol that defines the default experimental
setup for this database. ``grandtest`` is the default protocol for
this database.
original_directory : str
The directory where the original data of the database are stored.
original_extension : str
The file name extension of the original data.
annotation_directory : str
If provided, the annotations will be read from this directory
instead of the default annotations that are provided.
annotation_extension : str
The extension of annotations when annotation_extension is provided.
**kwargs
The arguments of the :py:class:`bob.bio.base.database.BioDatabase`
base class constructor.
"""
from bob.db.replaymobile import Database as LowLevelDatabase
self.db = LowLevelDatabase()
# Since the high level API expects different group names than what the
# low level API offers, you need to convert them when necessary
self.low_level_group_names = (
'train', 'devel',
'test') # group names in the low-level database interface
self.high_level_group_names = (
'train', 'dev',
'eval') # names are expected to be like that in objects() function
# Always use super to call parent class methods.
super(ReplayMobilePadDatabase, self).__init__(
name='replay-mobile',
protocol=protocol,
original_directory=original_directory,
original_extension=original_extension,
from bob.pad.base.database import FileListPadDatabase
from bob.pad.face.database import VideoPadSample
from bob.pipelines.transformers import Str_To_Types, str_to_bool
from sklearn.pipeline import make_pipeline
from bob.extension.download import get_file
logger = logging.getLogger(__name__)
def get_rm_video_transform(sample):
should_flip = sample.should_flip
def transform(video):
video = np.asarray(video)
video = np.rollaxis(video, -1, -2)
if should_flip:
video = video[..., ::-1, :]
return video
return transform
def ReplayMobilePadDatabase(
protocol="grandtest",
selection_style=None,
max_number_of_frames=None,
step_size=None,
annotation_directory=None,
annotation_type=None,
fixed_positions=None,
**kwargs,
):
dataset_protocols_path = os.path.expanduser(
"~/temp/bob_data/datasets/pad-face-replay-mobile.tar.gz"
)
if annotation_directory is None:
annotation_directory = get_file(
"replaymobile-mtcnn-annotations.tar.xz",
[
"http://www.idiap.ch/software/bob/data/bob/bob.db.replaymobile/master/replaymobile-mtcnn-annotations.tar.xz"
],
)
annotation_type = "eyes-center"
transformer = make_pipeline(
Str_To_Types(fieldtypes=dict(should_flip=str_to_bool)),
VideoPadSample(
original_directory=rc.get("bob.db.replaymobile.directory"),
annotation_directory=annotation_directory,
annotation_extension=annotation_extension,
annotation_type=annotation_type,
**kwargs)
@property
def original_directory(self):
return self.db.original_directory
@original_directory.setter
def original_directory(self, value):
self.db.original_directory = value
def objects(self,
groups=None,
protocol=None,
purposes=None,
model_ids=None,
**kwargs):
"""
This function returns lists of ReplayMobilePadFile objects, which
fulfill the given restrictions.
Parameters
----------
groups : str
OR a list of strings.
The groups of which the clients should be returned.
Usually, groups are one or more elements of
('train', 'dev', 'eval')
protocol : str
The protocol for which the clients should be retrieved.
The protocol is dependent on your database.
If you do not have protocols defined, just ignore this field.
purposes : str
OR a list of strings.
The purposes for which File objects should be retrieved.
Usually it is either 'real' or 'attack'.
model_ids
This parameter is not supported in PAD databases yet
**kwargs
Returns
-------
files : [ReplayMobilePadFile]
A list of ReplayMobilePadFile objects.
"""
# Convert group names to low-level group names here.
groups = self.convert_names_to_lowlevel(
groups, self.low_level_group_names, self.high_level_group_names)
# Since this database was designed for PAD experiments, nothing special
# needs to be done here.
files = self.db.objects(
protocol=protocol, groups=groups, cls=purposes, **kwargs)
files = [ReplayMobilePadFile(f) for f in files]
for f in files:
f.original_directory = self.original_directory
f.annotation_directory = self.annotation_directory
f.annotation_extension = self.annotation_extension
f.annotation_type = self.annotation_type
return files
def annotations(self, f):
"""
Return annotations for a given file object ``f``, which is an instance
of ``ReplayMobilePadFile`` defined in the HLDI of the Replay-Mobile DB.
The ``load()`` method of ``ReplayMobilePadFile`` class (see above)
returns a video, therefore this method returns bounding-box annotations
for each video frame. The annotations are returned as dictionary of
dictionaries.
If ``self.annotation_directory`` is not None, it will read the
annotations from there.
Parameters
----------
f : :any:`ReplayMobilePadFile`
An instance of :any:`ReplayMobilePadFile` defined above.
Returns
-------
annotations : :py:class:`dict`
A dictionary containing the annotations for each frame in the
video. Dictionary structure:
``annotations = {'1': frame1_dict, '2': frame1_dict, ...}``. Where
``frameN_dict = {'topleft': (row, col),'bottomright': (row, col)}``
is the dictionary defining the coordinates of the face bounding box
in frame N.
"""
return f.annotations
def frames(self, padfile):
"""Yields the frames of the padfile one by one.
Parameters
----------
padfile : :any:`ReplayMobilePadFile`
The high-level replay pad file
Yields
------
:any:`numpy.array`
A frame of the video. The size is (3, 1280, 720).
"""
return padfile.frames
def number_of_frames(self, padfile):
"""Returns the number of frames in a video file.
Parameters
----------
padfile : :any:`ReplayPadFile`
The high-level pad file
Returns
-------
int
The number of frames.
"""
return padfile.number_of_frames
@property
def frame_shape(self):
"""Returns the size of each frame in this database.
Returns
-------
(int, int, int)
The (#Channels, Height, Width) which is (3, 1280, 720).
"""
return REPLAYMOBILE_FRAME_SHAPE
selection_style=selection_style,
max_number_of_frames=max_number_of_frames,
step_size=step_size,
get_transform=get_rm_video_transform,
),
)
database = FileListPadDatabase(
dataset_protocols_path,
protocol,
transformer=transformer,
**kwargs,
)
database.annotation_type = annotation_type
database.fixed_positions = fixed_positions
return database
......@@ -8,169 +8,206 @@ import bob.bio.base
from bob.bio.base.test.utils import db_available
@db_available('replay') # the name of the package
@db_available("replay") # the name of the package
def test_replay():
# replay-attack is the name of the entry point
replay_database_instance = bob.bio.base.load_resource(
'replay-attack',
'database',
preferred_package='bob.pad.face',
package_prefix='bob.pad.'
"replay-attack",
"database",
preferred_package="bob.pad.face",
package_prefix="bob.pad.",
).database
try:
assert len(
replay_database_instance.objects(
groups=['train', 'dev', 'eval'])) == 1200
assert len(
replay_database_instance.objects(groups=['train', 'dev'])) == 720
assert len(replay_database_instance.objects(groups=['train'])) == 360
assert len(
replay_database_instance.objects(
groups=['train', 'dev', 'eval'], protocol='grandtest')) == 1200
assert len(
replay_database_instance.objects(
groups=['train', 'dev', 'eval'],
protocol='grandtest',
purposes='real')) == 200
assert len(
replay_database_instance.objects(
groups=['train', 'dev', 'eval'],
protocol='grandtest',
purposes='attack')) == 1000
assert (
len(replay_database_instance.objects(groups=["train", "dev", "eval"]))
== 1200
)
assert len(replay_database_instance.objects(groups=["train", "dev"])) == 720
assert len(replay_database_instance.objects(groups=["train"])) == 360
assert (
len(
replay_database_instance.objects(
groups=["train", "dev", "eval"], protocol="grandtest"
)
)
== 1200
)
assert (
len(
replay_database_instance.objects(
groups=["train", "dev", "eval"],
protocol="grandtest",
purposes="real",
)
)
== 200
)
assert (
len(
replay_database_instance.objects(
groups=["train", "dev", "eval"],
protocol="grandtest",
purposes="attack",
)
)
== 1000
)
except IOError as e:
raise SkipTest(
"The database could not be queried; probably the db.sql3 file is missing. Here is the error: '%s'"
% e)
% e
)
@db_available('replaymobile')
def test_replaymobile():
replaymobile = bob.bio.base.load_resource(
'replay-mobile',
'database',
preferred_package='bob.pad.face',
package_prefix='bob.pad.').database
database = bob.bio.base.load_resource(
"replay-mobile",
"database",
preferred_package="bob.pad.face",
package_prefix="bob.pad.",
)
assert database.protocols() == ["grandtest", "mattescreen", "print"]
assert database.groups() == ["dev", "eval", "train"]
assert len(database.samples(groups=["train", "dev", "eval"])) == 1030
assert len(database.samples(groups=["train", "dev"])) == 728
assert len(database.samples(groups=["train"])) == 312
assert len(database.samples(groups=["train", "dev", "eval"])) == 1030
assert (
len(database.samples(groups=["train", "dev", "eval"], purposes="real")) == 390
)
assert (
len(database.samples(groups=["train", "dev", "eval"], purposes="attack")) == 640
)
sample = database.sort(database.samples())[0]
try:
assert len(
replaymobile.objects(groups=['train', 'dev', 'eval'])) == 1030
assert len(replaymobile.objects(groups=['train', 'dev'])) == 728
assert len(replaymobile.objects(groups=['train'])) == 312
assert len(
replaymobile.objects(
groups=['train', 'dev', 'eval'], protocol='grandtest')) == 1030
assert len(
replaymobile.objects(
groups=['train', 'dev', 'eval'],
protocol='grandtest',
purposes='real')) == 390
assert len(
replaymobile.objects(
groups=['train', 'dev', 'eval'],
protocol='grandtest',
purposes='attack')) == 640
assert sample.annotations["0"] == {
"bottomright": [760, 498],
"topleft": [374, 209],
"leye": [518, 417],
"reye": [522, 291],
"mouthleft": [669, 308],
"mouthright": [666, 407],
"nose": [585, 358],
}
assert sample.data.shape == (20, 3, 720, 1280)
except IOError as e:
raise SkipTest(
"The database could not be queried; probably the db.sql3 file is missing. Here is the error: '%s'"
% e)
raise SkipTest(e)
# Test the maskattack database
@db_available('maskattack')
@db_available("maskattack")
def test_maskattack():
maskattack = bob.bio.base.load_resource(
'maskattack',
'database',
preferred_package='bob.pad.face',
package_prefix='bob.pad.').database
"maskattack",
"database",
preferred_package="bob.pad.face",
package_prefix="bob.pad.",
).database
try:
# all real sequences: 2 sessions, 5 recordings for 17 individuals
assert len(maskattack.objects(
groups=['train', 'dev', 'eval'], purposes='real')) == 170
assert (
len(maskattack.objects(groups=["train", "dev", "eval"], purposes="real"))
== 170
)
# all attacks: 1 session, 5 recordings for 17 individuals
assert len(maskattack.objects(
groups=['train', 'dev', 'eval'], purposes='attack')) == 85
assert (
len(maskattack.objects(groups=["train", "dev", "eval"], purposes="attack"))
== 85
)
# training real: 7 subjects, 2 sessions, 5 recordings
assert len(maskattack.objects(groups=['train'], purposes='real')) == 70
assert len(maskattack.objects(groups=["train"], purposes="real")) == 70
# training real: 7 subjects, 1 session, 5 recordings
assert len(maskattack.objects(
groups=['train'], purposes='attack')) == 35
assert len(maskattack.objects(groups=["train"], purposes="attack")) == 35
# dev and test contains the same number of sequences:
# real: 5 subjects, 2 sessions, 5 recordings
# attack: 5 subjects, 1 sessions, 5 recordings
assert len(maskattack.objects(groups=['dev'], purposes='real')) == 50
assert len(maskattack.objects(groups=['eval'], purposes='real')) == 50
assert len(maskattack.objects(groups=['dev'], purposes='attack')) == 25
assert len(maskattack.objects(
groups=['eval'], purposes='attack')) == 25
assert len(maskattack.objects(groups=["dev"], purposes="real")) == 50
assert len(maskattack.objects(groups=["eval"], purposes="real")) == 50
assert len(maskattack.objects(groups=["dev"], purposes="attack")) == 25
assert len(maskattack.objects(groups=["eval"], purposes="attack")) == 25
except IOError as e:
raise SkipTest(
"The database could not be queried; probably the db.sql3 file is missing. Here is the error: '%s'"
% e)
% e
)
# Test the casiasurf database
@db_available('casiasurf')
@db_available("casiasurf")
def test_casiasurf():
casiasurf = bob.bio.base.load_resource(
'casiasurf',
'database',
preferred_package='bob.pad.face',
package_prefix='bob.pad.').database
"casiasurf",
"database",
preferred_package="bob.pad.face",
package_prefix="bob.pad.",
).database
try:
assert len(casiasurf.objects(groups=['train'], purposes='real')) == 8942
assert len(casiasurf.objects(groups=['train'], purposes='attack')) == 20324
assert len(casiasurf.objects(groups=('dev',), purposes=('real',))) == 2994
assert len(casiasurf.objects(groups=('dev',), purposes=('attack',))) == 6614
assert len(casiasurf.objects(groups=('dev',), purposes=('real','attack'))) == 9608
assert len(casiasurf.objects(groups=('eval',), purposes=('real',))) == 17458
assert len(casiasurf.objects(groups=('eval',), purposes=('attack',))) == 40252
assert len(casiasurf.objects(groups=('eval',), purposes=('real','attack'))) == 57710
assert len(casiasurf.objects(groups=["train"], purposes="real")) == 8942
assert len(casiasurf.objects(groups=["train"], purposes="attack")) == 20324
assert len(casiasurf.objects(groups=("dev",), purposes=("real",))) == 2994
assert len(casiasurf.objects(groups=("dev",), purposes=("attack",))) == 6614
assert (