Commit 9a3f8488 authored by Amir MOHAMMADI's avatar Amir MOHAMMADI
Browse files

WIP: port databases to the new csv format

parent fb28b9f4
Pipeline #48602 failed with stage
in 10 minutes and 58 seconds
...@@ -13,7 +13,10 @@ else: ...@@ -13,7 +13,10 @@ else:
# Preprocessor # # Preprocessor #
cropper = face_crop_solver( cropper = face_crop_solver(
cropped_image_size=64, cropped_positions=annotation_type, color_channel="gray" cropped_image_size=64,
cropped_positions=annotation_type,
color_channel="gray",
fixed_positions=fixed_positions,
) )
preprocessor = VideoWrapper(cropper) preprocessor = VideoWrapper(cropper)
preprocessor = mario.wrap( preprocessor = mario.wrap(
......
...@@ -13,6 +13,5 @@ the link. ...@@ -13,6 +13,5 @@ the link.
.. include:: links.rst .. include:: links.rst
""" """
from bob.pad.face.database import ReplayMobilePadDatabase from bob.pad.face.database import ReplayMobilePadDatabase
from bob.pad.base.pipelines.vanilla_pad import DatabaseConnector
database = DatabaseConnector(ReplayMobilePadDatabase()) database = ReplayMobilePadDatabase()
from .database import VideoPadFile from .database import VideoPadFile, VideoPadSample
from .replay import ReplayPadDatabase from .replay import ReplayPadDatabase
from .replay_mobile import ReplayMobilePadDatabase from .replay_mobile import ReplayMobilePadDatabase
from .mifs import MIFSPadDatabase from .mifs import MIFSPadDatabase
......
from functools import partial
import os
from bob.pad.base.database import PadFile from bob.pad.base.database import PadFile
import bob.bio.video import bob.bio.video
import bob.io.video import bob.io.video
from bob.db.base.annotations import read_annotation_file from bob.db.base.annotations import read_annotation_file
from sklearn.preprocessing import FunctionTransformer
from bob.bio.video import VideoAsArray
from bob.pipelines import DelayedSample
def delayed_video_load(
samples,
original_directory,
annotation_directory=None,
selection_style=None,
max_number_of_frames=None,
step_size=None,
get_transform=None,
):
if get_transform is None:
def get_transform(x):
return None
original_directory = original_directory or ""
annotation_directory = annotation_directory or ""
results = []
for sample in samples:
video_path = os.path.join(original_directory, sample.filename)
data = partial(
VideoAsArray,
path=video_path,
selection_style=selection_style,
max_number_of_frames=max_number_of_frames,
step_size=step_size,
transform=get_transform(sample),
)
annotations, delayed_attributes = None, None
if annotation_directory:
path = os.path.splitext(sample.filename)[0]
delayed_annotations = partial(
read_annotation_file,
file_name=f"{annotation_directory}:{path}.json",
annotation_type="json",
)
delayed_attributes = {"annotations": delayed_annotations}
results.append(
DelayedSample(
data,
parent=sample,
delayed_attributes=delayed_attributes,
annotations=annotations,
)
)
return results
def VideoPadSample(
original_directory,
annotation_directory=None,
selection_style=None,
max_number_of_frames=None,
step_size=None,
get_transform=None,
):
return FunctionTransformer(
delayed_video_load,
validate=False,
kw_args=dict(
original_directory=original_directory,
annotation_directory=annotation_directory,
selection_style=selection_style,
max_number_of_frames=max_number_of_frames,
step_size=step_size,
get_transform=get_transform,
),
)
class VideoPadFile(PadFile): class VideoPadFile(PadFile):
......
#!/usr/bin/env python2 import logging
# -*- coding: utf-8 -*- import os
# Used in ReplayMobilePadFile class import numpy as np
from bob.pad.base.database import PadDatabase
from bob.pad.face.database import VideoPadFile
from bob.pad.face.utils import number_of_frames
from bob.db.base.annotations import read_annotation_file
from bob.extension import rc from bob.extension import rc
from bob.pad.base.database import FileListPadDatabase
REPLAYMOBILE_FRAME_SHAPE = (3, 1280, 720) from bob.pad.face.database import VideoPadSample
from bob.pipelines.transformers import Str_To_Types, str_to_bool
from sklearn.pipeline import make_pipeline
class ReplayMobilePadFile(VideoPadFile): from bob.extension.download import get_file
"""
A high level implementation of the File class for the Replay-Mobile logger = logging.getLogger(__name__)
database.
"""
def get_rm_video_transform(sample):
def __init__(self, f): should_flip = sample.should_flip
"""
Parameters def transform(video):
---------- video = np.asarray(video)
f : object video = np.rollaxis(video, -1, -2)
An instance of the File class defined in the low level db interface if should_flip:
of the Replay-Mobile database, in the bob.db.replaymobile.models.py video = video[..., ::-1, :]
file. return video
"""
return transform
self.f = f
# this f is actually an instance of the File class that is defined in
# bob.db.replaymobile.models and the PadFile class here needs def ReplayMobilePadDatabase(
# client_id, path, attack_type, file_id for initialization. We have to protocol="grandtest",
# convert information here and provide them to PadFile. attack_type is selection_style=None,
# a little tricky to get here. Based on the documentation of PadFile: max_number_of_frames=None,
# In cased of a spoofed data, this parameter should indicate what kind step_size=None,
# of spoofed attack it is. The default None value is interpreted that annotation_directory=None,
# the PadFile is a genuine or real sample. annotation_type=None,
if f.is_real(): fixed_positions=None,
attack_type = None **kwargs,
else: ):
attack_type = 'attack' dataset_protocols_path = os.path.expanduser(
# attack_type is a string and I decided to make it like this for this "~/temp/bob_data/datasets/pad-face-replay-mobile.tar.gz"
# particular database. You can do whatever you want for your own )
# database. if annotation_directory is None:
annotation_directory = get_file(
super(ReplayMobilePadFile, self).__init__( "replaymobile-mtcnn-annotations.tar.xz",
client_id=f.client_id, [
path=f.path, "http://www.idiap.ch/software/bob/data/bob/bob.db.replaymobile/master/replaymobile-mtcnn-annotations.tar.xz"
attack_type=attack_type, ],
file_id=f.id) )
annotation_type = "eyes-center"
def load(self): transformer = make_pipeline(
""" Str_To_Types(fieldtypes=dict(should_flip=str_to_bool)),
Overridden version of the load method defined in the ``VideoPadFile``. VideoPadSample(
original_directory=rc.get("bob.db.replaymobile.directory"),
Returns
-------
video_data : :any:`bob.bio.video.VideoAsArray`
Video data.
"""
# TODO(amir): Handle loading with VideoAsArray and with a transform as
# some video files need to be flipped.
video_data_array = self.f.load(
directory=self.original_directory, extension=self.original_extension)
return video_data_array
@property
def annotations(self):
from bob.db.replaymobile.models import replaymobile_annotations
if self.annotation_directory is not None:
# return the external annotations
annotations = read_annotation_file(
self.make_path(self.annotation_directory,
self.annotation_extension),
self.annotation_type)
return annotations
# return original annotations
return replaymobile_annotations(self.f, self.original_directory)
@property
def frames(self):
from bob.db.replaymobile.models import replaymobile_frames
return replaymobile_frames(self.f, self.original_directory)
@property
def number_of_frames(self):
vfilename = self.make_path(
directory=self.original_directory,
extension='.mov')
return number_of_frames(vfilename)
@property
def frame_shape(self):
return REPLAYMOBILE_FRAME_SHAPE
class ReplayMobilePadDatabase(PadDatabase):
"""
A high level implementation of the Database class for the Replay-Mobile
database.
"""
def __init__(
self,
protocol='grandtest',
original_directory=rc['bob.db.replaymobile.directory'],
original_extension='.mov',
annotation_directory=None,
annotation_extension='.json',
annotation_type='json',
**kwargs):
"""
Parameters
----------
protocol : str or None
The name of the protocol that defines the default experimental
setup for this database. ``grandtest`` is the default protocol for
this database.
original_directory : str
The directory where the original data of the database are stored.
original_extension : str
The file name extension of the original data.
annotation_directory : str
If provided, the annotations will be read from this directory
instead of the default annotations that are provided.
annotation_extension : str
The extension of annotations when annotation_extension is provided.
**kwargs
The arguments of the :py:class:`bob.bio.base.database.BioDatabase`
base class constructor.
"""
from bob.db.replaymobile import Database as LowLevelDatabase
self.db = LowLevelDatabase()
# Since the high level API expects different group names than what the
# low level API offers, you need to convert them when necessary
self.low_level_group_names = (
'train', 'devel',
'test') # group names in the low-level database interface
self.high_level_group_names = (
'train', 'dev',
'eval') # names are expected to be like that in objects() function
# Always use super to call parent class methods.
super(ReplayMobilePadDatabase, self).__init__(
name='replay-mobile',
protocol=protocol,
original_directory=original_directory,
original_extension=original_extension,
annotation_directory=annotation_directory, annotation_directory=annotation_directory,
annotation_extension=annotation_extension, selection_style=selection_style,
annotation_type=annotation_type, max_number_of_frames=max_number_of_frames,
**kwargs) step_size=step_size,
get_transform=get_rm_video_transform,
@property ),
def original_directory(self): )
return self.db.original_directory database = FileListPadDatabase(
dataset_protocols_path,
@original_directory.setter protocol,
def original_directory(self, value): transformer=transformer,
self.db.original_directory = value **kwargs,
)
def objects(self, database.annotation_type = annotation_type
groups=None, database.fixed_positions = fixed_positions
protocol=None, return database
purposes=None,
model_ids=None,
**kwargs):
"""
This function returns lists of ReplayMobilePadFile objects, which
fulfill the given restrictions.
Parameters
----------
groups : str
OR a list of strings.
The groups of which the clients should be returned.
Usually, groups are one or more elements of
('train', 'dev', 'eval')
protocol : str
The protocol for which the clients should be retrieved.
The protocol is dependent on your database.
If you do not have protocols defined, just ignore this field.
purposes : str
OR a list of strings.
The purposes for which File objects should be retrieved.
Usually it is either 'real' or 'attack'.
model_ids
This parameter is not supported in PAD databases yet
**kwargs
Returns
-------
files : [ReplayMobilePadFile]
A list of ReplayMobilePadFile objects.
"""
# Convert group names to low-level group names here.
groups = self.convert_names_to_lowlevel(
groups, self.low_level_group_names, self.high_level_group_names)
# Since this database was designed for PAD experiments, nothing special
# needs to be done here.
files = self.db.objects(
protocol=protocol, groups=groups, cls=purposes, **kwargs)
files = [ReplayMobilePadFile(f) for f in files]
for f in files:
f.original_directory = self.original_directory
f.annotation_directory = self.annotation_directory
f.annotation_extension = self.annotation_extension
f.annotation_type = self.annotation_type
return files
def annotations(self, f):
"""
Return annotations for a given file object ``f``, which is an instance
of ``ReplayMobilePadFile`` defined in the HLDI of the Replay-Mobile DB.
The ``load()`` method of ``ReplayMobilePadFile`` class (see above)
returns a video, therefore this method returns bounding-box annotations
for each video frame. The annotations are returned as dictionary of
dictionaries.
If ``self.annotation_directory`` is not None, it will read the
annotations from there.
Parameters
----------
f : :any:`ReplayMobilePadFile`
An instance of :any:`ReplayMobilePadFile` defined above.
Returns
-------
annotations : :py:class:`dict`
A dictionary containing the annotations for each frame in the
video. Dictionary structure:
``annotations = {'1': frame1_dict, '2': frame1_dict, ...}``. Where
``frameN_dict = {'topleft': (row, col),'bottomright': (row, col)}``
is the dictionary defining the coordinates of the face bounding box
in frame N.
"""
return f.annotations
def frames(self, padfile):
"""Yields the frames of the padfile one by one.
Parameters
----------
padfile : :any:`ReplayMobilePadFile`
The high-level replay pad file
Yields
------
:any:`numpy.array`
A frame of the video. The size is (3, 1280, 720).
"""
return padfile.frames
def number_of_frames(self, padfile):
"""Returns the number of frames in a video file.
Parameters
----------
padfile : :any:`ReplayPadFile`
The high-level pad file
Returns
-------
int
The number of frames.
"""
return padfile.number_of_frames
@property
def frame_shape(self):
"""Returns the size of each frame in this database.
Returns
-------
(int, int, int)
The (#Channels, Height, Width) which is (3, 1280, 720).
"""
return REPLAYMOBILE_FRAME_SHAPE
...@@ -8,169 +8,206 @@ import bob.bio.base ...@@ -8,169 +8,206 @@ import bob.bio.base
from bob.bio.base.test.utils import db_available from bob.bio.base.test.utils import db_available
@db_available('replay') # the name of the package @db_available("replay") # the name of the package
def test_replay(): def test_replay():
# replay-attack is the name of the entry point # replay-attack is the name of the entry point
replay_database_instance = bob.bio.base.load_resource( replay_database_instance = bob.bio.base.load_resource(
'replay-attack', "replay-attack",
'database', "database",
preferred_package='bob.pad.face', preferred_package="bob.pad.face",
package_prefix='bob.pad.' package_prefix="bob.pad.",
).database ).database
try: try:
assert len( assert (
replay_database_instance.objects( len(replay_database_instance.objects(groups=["train", "dev", "eval"]))
groups=['train', 'dev', 'eval'])) == 1200 == 1200
assert len( )
replay_database_instance.objects(groups=['train', 'dev'])) == 720 assert len(replay_database_instance.objects(groups=["train", "dev"])) == 720
assert len(replay_database_instance.objects(groups=['train'])) == 360 assert len(replay_database_instance.objects(groups=["train"])) == 360
assert len( assert (
replay_database_instance.objects( len(
groups=['train', 'dev', 'eval'], protocol='grandtest')) == 1200 replay_database_instance.objects(
assert len( groups=["train", "dev", "eval"], protocol="grandtest"
replay_database_instance.objects( )
groups=['train', 'dev', 'eval'], )
protocol='grandtest', == 1200
purposes='real')) == 200 )
assert len( assert (
replay_database_instance.objects( len(
groups=['train', 'dev', 'eval'], replay_database_instance.objects(
protocol='grandtest', groups=["train", "dev", "eval"],
purposes='attack')) == 1000 protocol="grandtest",
purposes="real",
)
)
== 200
)
assert (
len(
replay_database_instance.objects(
groups=["train", "dev", "eval"],
protocol="grandtest",
purposes="attack",
)
)
== 1000
)
except IOError as e: except IOError as e:
raise SkipTest( raise SkipTest(
"The database could not be queried; probably the db.sql3 file is missing. Here is the error: '%s'" "The database could not be queried; probably the db.sql3 file is missing. Here is the error: '%s'"
% e) % e
)
@db_available('replaymobile')
def test_replaymobile(): def test_replaymobile():
replaymobile = bob.bio.base.load_resource( database = bob.bio.base.load_resource(
'replay-mobile', "replay-mobile",
'database', "database",
preferred_package='bob.pad.face', preferred_package="bob.pad.face",
package_prefix='bob.pad.').database package_prefix="bob.pad.",
)
assert database.protocols() == ["grandtest", "mattescreen", "print"]
assert database.groups() == ["dev", "eval", "train"]
assert len(database.samples(groups=["train", "dev", "eval"])) == 1030
assert len(database.samples(groups=["train", "dev"])) == 728
assert len(database.samples(groups=["train"])) == 312
assert len(database.samples(groups=["train", "dev", "eval"])) == 1030
assert (
len(database.samples(groups=["train", "dev", "eval"], purposes="real")) == 390
)
assert (
len(database.samples(groups=["train", "dev", "eval"], purposes="attack")) == 640
)
sample = database.sort(database.samples())[0]
try: try:
assert sample.annotations["0"] == {
assert len( "bottomright": [760, 498],
replaymobile.objects(groups=['train', 'dev', 'eval'])) == 1030 "topleft": [374, 209],
assert len(replaymobile.objects(groups=['train', 'dev'])) == 728 "leye": [518, 417],
assert len(replaymobile.objects(groups=['train'])) == 312 "reye": [522, 291],
assert len( "mouthleft": [669, 308],
replaymobile.objects( "mouthright": [666, 407],
groups=['train', 'dev', 'eval'], protocol='grandtest')) == 1030 "nose": [585, 358],
assert len( }
replaymobile.objects( assert sample.data.shape == (20, 3, 720, 1280)
groups=['train', 'dev', 'eval'],