Commit 9a3f8488 authored by Amir MOHAMMADI's avatar Amir MOHAMMADI
Browse files

WIP: port databases to the new csv format

parent fb28b9f4
Pipeline #48602 failed with stage
in 10 minutes and 58 seconds
...@@ -13,7 +13,10 @@ else: ...@@ -13,7 +13,10 @@ else:
# Preprocessor # # Preprocessor #
cropper = face_crop_solver( cropper = face_crop_solver(
cropped_image_size=64, cropped_positions=annotation_type, color_channel="gray" cropped_image_size=64,
cropped_positions=annotation_type,
color_channel="gray",
fixed_positions=fixed_positions,
) )
preprocessor = VideoWrapper(cropper) preprocessor = VideoWrapper(cropper)
preprocessor = mario.wrap( preprocessor = mario.wrap(
......
...@@ -13,6 +13,5 @@ the link. ...@@ -13,6 +13,5 @@ the link.
.. include:: links.rst .. include:: links.rst
""" """
from bob.pad.face.database import ReplayMobilePadDatabase from bob.pad.face.database import ReplayMobilePadDatabase
from bob.pad.base.pipelines.vanilla_pad import DatabaseConnector
database = DatabaseConnector(ReplayMobilePadDatabase()) database = ReplayMobilePadDatabase()
from .database import VideoPadFile from .database import VideoPadFile, VideoPadSample
from .replay import ReplayPadDatabase from .replay import ReplayPadDatabase
from .replay_mobile import ReplayMobilePadDatabase from .replay_mobile import ReplayMobilePadDatabase
from .mifs import MIFSPadDatabase from .mifs import MIFSPadDatabase
......
from functools import partial
import os
from bob.pad.base.database import PadFile from bob.pad.base.database import PadFile
import bob.bio.video import bob.bio.video
import bob.io.video import bob.io.video
from bob.db.base.annotations import read_annotation_file from bob.db.base.annotations import read_annotation_file
from sklearn.preprocessing import FunctionTransformer
from bob.bio.video import VideoAsArray
from bob.pipelines import DelayedSample
def delayed_video_load(
samples,
original_directory,
annotation_directory=None,
selection_style=None,
max_number_of_frames=None,
step_size=None,
get_transform=None,
):
if get_transform is None:
def get_transform(x):
return None
original_directory = original_directory or ""
annotation_directory = annotation_directory or ""
results = []
for sample in samples:
video_path = os.path.join(original_directory, sample.filename)
data = partial(
VideoAsArray,
path=video_path,
selection_style=selection_style,
max_number_of_frames=max_number_of_frames,
step_size=step_size,
transform=get_transform(sample),
)
annotations, delayed_attributes = None, None
if annotation_directory:
path = os.path.splitext(sample.filename)[0]
delayed_annotations = partial(
read_annotation_file,
file_name=f"{annotation_directory}:{path}.json",
annotation_type="json",
)
delayed_attributes = {"annotations": delayed_annotations}
results.append(
DelayedSample(
data,
parent=sample,
delayed_attributes=delayed_attributes,
annotations=annotations,
)
)
return results
def VideoPadSample(
original_directory,
annotation_directory=None,
selection_style=None,
max_number_of_frames=None,
step_size=None,
get_transform=None,
):
return FunctionTransformer(
delayed_video_load,
validate=False,
kw_args=dict(
original_directory=original_directory,
annotation_directory=annotation_directory,
selection_style=selection_style,
max_number_of_frames=max_number_of_frames,
step_size=step_size,
get_transform=get_transform,
),
)
class VideoPadFile(PadFile): class VideoPadFile(PadFile):
......
#!/usr/bin/env python2 import logging
# -*- coding: utf-8 -*- import os
# Used in ReplayMobilePadFile class import numpy as np
from bob.pad.base.database import PadDatabase
from bob.pad.face.database import VideoPadFile
from bob.pad.face.utils import number_of_frames
from bob.db.base.annotations import read_annotation_file
from bob.extension import rc from bob.extension import rc
from bob.pad.base.database import FileListPadDatabase
from bob.pad.face.database import VideoPadSample
from bob.pipelines.transformers import Str_To_Types, str_to_bool
from sklearn.pipeline import make_pipeline
from bob.extension.download import get_file
REPLAYMOBILE_FRAME_SHAPE = (3, 1280, 720) logger = logging.getLogger(__name__)
class ReplayMobilePadFile(VideoPadFile): def get_rm_video_transform(sample):
""" should_flip = sample.should_flip
A high level implementation of the File class for the Replay-Mobile
database.
"""
def __init__(self, f): def transform(video):
""" video = np.asarray(video)
Parameters video = np.rollaxis(video, -1, -2)
---------- if should_flip:
f : object video = video[..., ::-1, :]
An instance of the File class defined in the low level db interface return video
of the Replay-Mobile database, in the bob.db.replaymobile.models.py
file.
"""
self.f = f return transform
# this f is actually an instance of the File class that is defined in
# bob.db.replaymobile.models and the PadFile class here needs
# client_id, path, attack_type, file_id for initialization. We have to
# convert information here and provide them to PadFile. attack_type is
# a little tricky to get here. Based on the documentation of PadFile:
# In cased of a spoofed data, this parameter should indicate what kind
# of spoofed attack it is. The default None value is interpreted that
# the PadFile is a genuine or real sample.
if f.is_real():
attack_type = None
else:
attack_type = 'attack'
# attack_type is a string and I decided to make it like this for this
# particular database. You can do whatever you want for your own
# database.
super(ReplayMobilePadFile, self).__init__(
client_id=f.client_id,
path=f.path,
attack_type=attack_type,
file_id=f.id)
def load(self): def ReplayMobilePadDatabase(
""" protocol="grandtest",
Overridden version of the load method defined in the ``VideoPadFile``. selection_style=None,
max_number_of_frames=None,
Returns step_size=None,
-------
video_data : :any:`bob.bio.video.VideoAsArray`
Video data.
"""
# TODO(amir): Handle loading with VideoAsArray and with a transform as
# some video files need to be flipped.
video_data_array = self.f.load(
directory=self.original_directory, extension=self.original_extension)
return video_data_array
@property
def annotations(self):
from bob.db.replaymobile.models import replaymobile_annotations
if self.annotation_directory is not None:
# return the external annotations
annotations = read_annotation_file(
self.make_path(self.annotation_directory,
self.annotation_extension),
self.annotation_type)
return annotations
# return original annotations
return replaymobile_annotations(self.f, self.original_directory)
@property
def frames(self):
from bob.db.replaymobile.models import replaymobile_frames
return replaymobile_frames(self.f, self.original_directory)
@property
def number_of_frames(self):
vfilename = self.make_path(
directory=self.original_directory,
extension='.mov')
return number_of_frames(vfilename)
@property
def frame_shape(self):
return REPLAYMOBILE_FRAME_SHAPE
class ReplayMobilePadDatabase(PadDatabase):
"""
A high level implementation of the Database class for the Replay-Mobile
database.
"""
def __init__(
self,
protocol='grandtest',
original_directory=rc['bob.db.replaymobile.directory'],
original_extension='.mov',
annotation_directory=None, annotation_directory=None,
annotation_extension='.json', annotation_type=None,
annotation_type='json', fixed_positions=None,
**kwargs): **kwargs,
""" ):
Parameters dataset_protocols_path = os.path.expanduser(
---------- "~/temp/bob_data/datasets/pad-face-replay-mobile.tar.gz"
protocol : str or None )
The name of the protocol that defines the default experimental if annotation_directory is None:
setup for this database. ``grandtest`` is the default protocol for annotation_directory = get_file(
this database. "replaymobile-mtcnn-annotations.tar.xz",
[
original_directory : str "http://www.idiap.ch/software/bob/data/bob/bob.db.replaymobile/master/replaymobile-mtcnn-annotations.tar.xz"
The directory where the original data of the database are stored. ],
)
original_extension : str annotation_type = "eyes-center"
The file name extension of the original data. transformer = make_pipeline(
Str_To_Types(fieldtypes=dict(should_flip=str_to_bool)),
annotation_directory : str VideoPadSample(
If provided, the annotations will be read from this directory original_directory=rc.get("bob.db.replaymobile.directory"),
instead of the default annotations that are provided.
annotation_extension : str
The extension of annotations when annotation_extension is provided.
**kwargs
The arguments of the :py:class:`bob.bio.base.database.BioDatabase`
base class constructor.
"""
from bob.db.replaymobile import Database as LowLevelDatabase
self.db = LowLevelDatabase()
# Since the high level API expects different group names than what the
# low level API offers, you need to convert them when necessary
self.low_level_group_names = (
'train', 'devel',
'test') # group names in the low-level database interface
self.high_level_group_names = (
'train', 'dev',
'eval') # names are expected to be like that in objects() function
# Always use super to call parent class methods.
super(ReplayMobilePadDatabase, self).__init__(
name='replay-mobile',
protocol=protocol,
original_directory=original_directory,
original_extension=original_extension,
annotation_directory=annotation_directory, annotation_directory=annotation_directory,
annotation_extension=annotation_extension, selection_style=selection_style,
annotation_type=annotation_type, max_number_of_frames=max_number_of_frames,
**kwargs) step_size=step_size,
get_transform=get_rm_video_transform,
@property ),
def original_directory(self): )
return self.db.original_directory database = FileListPadDatabase(
dataset_protocols_path,
@original_directory.setter protocol,
def original_directory(self, value): transformer=transformer,
self.db.original_directory = value **kwargs,
)
def objects(self, database.annotation_type = annotation_type
groups=None, database.fixed_positions = fixed_positions
protocol=None, return database
purposes=None,
model_ids=None,
**kwargs):
"""
This function returns lists of ReplayMobilePadFile objects, which
fulfill the given restrictions.
Parameters
----------
groups : str
OR a list of strings.
The groups of which the clients should be returned.
Usually, groups are one or more elements of
('train', 'dev', 'eval')
protocol : str
The protocol for which the clients should be retrieved.
The protocol is dependent on your database.
If you do not have protocols defined, just ignore this field.
purposes : str
OR a list of strings.
The purposes for which File objects should be retrieved.
Usually it is either 'real' or 'attack'.
model_ids
This parameter is not supported in PAD databases yet
**kwargs
Returns
-------
files : [ReplayMobilePadFile]
A list of ReplayMobilePadFile objects.
"""
# Convert group names to low-level group names here.
groups = self.convert_names_to_lowlevel(
groups, self.low_level_group_names, self.high_level_group_names)
# Since this database was designed for PAD experiments, nothing special
# needs to be done here.
files = self.db.objects(
protocol=protocol, groups=groups, cls=purposes, **kwargs)
files = [ReplayMobilePadFile(f) for f in files]
for f in files:
f.original_directory = self.original_directory
f.annotation_directory = self.annotation_directory
f.annotation_extension = self.annotation_extension
f.annotation_type = self.annotation_type
return files
def annotations(self, f):
"""
Return annotations for a given file object ``f``, which is an instance
of ``ReplayMobilePadFile`` defined in the HLDI of the Replay-Mobile DB.
The ``load()`` method of ``ReplayMobilePadFile`` class (see above)
returns a video, therefore this method returns bounding-box annotations
for each video frame. The annotations are returned as dictionary of
dictionaries.
If ``self.annotation_directory`` is not None, it will read the
annotations from there.
Parameters
----------
f : :any:`ReplayMobilePadFile`
An instance of :any:`ReplayMobilePadFile` defined above.
Returns
-------
annotations : :py:class:`dict`
A dictionary containing the annotations for each frame in the
video. Dictionary structure:
``annotations = {'1': frame1_dict, '2': frame1_dict, ...}``. Where
``frameN_dict = {'topleft': (row, col),'bottomright': (row, col)}``
is the dictionary defining the coordinates of the face bounding box
in frame N.
"""
return f.annotations
def frames(self, padfile):
"""Yields the frames of the padfile one by one.
Parameters
----------
padfile : :any:`ReplayMobilePadFile`
The high-level replay pad file
Yields
------
:any:`numpy.array`
A frame of the video. The size is (3, 1280, 720).
"""
return padfile.frames
def number_of_frames(self, padfile):
"""Returns the number of frames in a video file.
Parameters
----------
padfile : :any:`ReplayPadFile`
The high-level pad file
Returns
-------
int
The number of frames.
"""
return padfile.number_of_frames
@property
def frame_shape(self):
"""Returns the size of each frame in this database.
Returns
-------
(int, int, int)
The (#Channels, Height, Width) which is (3, 1280, 720).
"""
return REPLAYMOBILE_FRAME_SHAPE
...@@ -8,169 +8,206 @@ import bob.bio.base ...@@ -8,169 +8,206 @@ import bob.bio.base
from bob.bio.base.test.utils import db_available from bob.bio.base.test.utils import db_available
@db_available('replay') # the name of the package @db_available("replay") # the name of the package
def test_replay(): def test_replay():
# replay-attack is the name of the entry point # replay-attack is the name of the entry point
replay_database_instance = bob.bio.base.load_resource( replay_database_instance = bob.bio.base.load_resource(
'replay-attack', "replay-attack",
'database', "database",
preferred_package='bob.pad.face', preferred_package="bob.pad.face",
package_prefix='bob.pad.' package_prefix="bob.pad.",
).database ).database
try: try:
assert len( assert (
len(replay_database_instance.objects(groups=["train", "dev", "eval"]))
== 1200
)
assert len(replay_database_instance.objects(groups=["train", "dev"])) == 720
assert len(replay_database_instance.objects(groups=["train"])) == 360
assert (
len(
replay_database_instance.objects( replay_database_instance.objects(
groups=['train', 'dev', 'eval'])) == 1200 groups=["train", "dev", "eval"], protocol="grandtest"
assert len( )
replay_database_instance.objects(groups=['train', 'dev'])) == 720 )
assert len(replay_database_instance.objects(groups=['train'])) == 360 == 1200
assert len( )
assert (
len(
replay_database_instance.objects( replay_database_instance.objects(
groups=['train', 'dev', 'eval'], protocol='grandtest')) == 1200 groups=["train", "dev", "eval"],
assert len( protocol="grandtest",
purposes="real",
)
)
== 200
)
assert (
len(
replay_database_instance.objects( replay_database_instance.objects(
groups=['train', 'dev', 'eval'], groups=["train", "dev", "eval"],
protocol='grandtest', protocol="grandtest",
purposes='real')) == 200 purposes="attack",
assert len( )
replay_database_instance.objects( )
groups=['train', 'dev', 'eval'], == 1000
protocol='grandtest', )
purposes='attack')) == 1000
except IOError as e: except IOError as e:
raise SkipTest( raise SkipTest(
"The database could not be queried; probably the db.sql3 file is missing. Here is the error: '%s'" "The database could not be queried; probably the db.sql3 file is missing. Here is the error: '%s'"
% e) % e
)
@db_available('replaymobile')
def test_replaymobile(): def test_replaymobile():
replaymobile = bob.bio.base.load_resource( database = bob.bio.base.load_resource(
'replay-mobile', "replay-mobile",
'database', "database",
preferred_package='bob.pad.face', preferred_package="bob.pad.face",
package_prefix='bob.pad.').database package_prefix="bob.pad.",
)
assert database.protocols() == ["grandtest", "mattescreen", "print"]
assert database.groups() == ["dev", "eval", "train"]
assert len(database.samples(groups=["train", "dev", "eval"])) == 1030
assert len(database.samples(groups=["train", "dev"])) == 728
assert len(database.samples(groups=["train"])) == 312
assert len(database.samples(groups=["train", "dev", "eval"])) == 1030
assert (
len(database.samples(groups=["train", "dev", "eval"], purposes="real")) == 390
)
assert (
len(database.samples(groups=["train", "dev", "eval"], purposes="attack")) == 640
)
sample = database.sort(database.samples())[0]
try: try:
assert sample.annotations["0"] == {
assert len( "bottomright": [760, 498],
replaymobile.objects(groups=['train', 'dev', 'eval'])) == 1030 "topleft": [374, 209],
assert len(replaymobile.objects(groups=['train', 'dev'])) == 728 "leye": [518, 417],
assert len(replaymobile.objects(groups=['train'])) == 312 "reye": [522, 291],
assert len( "mouthleft": [669, 308],
replaymobile.objects( "mouthright": [666, 407],
groups=['train', 'dev', 'eval'], protocol='grandtest')) == 1030 "nose": [585, 358],
assert len( }
replaymobile.objects( assert sample.data.shape == (20, 3, 720, 1280)