From c7ad23c5a4fc11db943d64313ff04608436c8d48 Mon Sep 17 00:00:00 2001 From: Yannick DAYER <yannick.dayer@idiap.ch> Date: Fri, 12 Mar 2021 16:29:26 +0100 Subject: [PATCH] Add a CSV database interface for replay-mobile --- .../face/config/database/replaymobile_csv.py | 21 ++ bob/bio/face/database/replaymobile_csv.py | 282 ++++++++++++++++++ bob/bio/face/test/test_databases.py | 14 + setup.py | 9 +- 4 files changed, 322 insertions(+), 4 deletions(-) create mode 100644 bob/bio/face/config/database/replaymobile_csv.py create mode 100644 bob/bio/face/database/replaymobile_csv.py diff --git a/bob/bio/face/config/database/replaymobile_csv.py b/bob/bio/face/config/database/replaymobile_csv.py new file mode 100644 index 00000000..556d576d --- /dev/null +++ b/bob/bio/face/config/database/replaymobile_csv.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : +# Yannick Dayer <yannick.dayer@idiap.ch> + +""" + Replay-mobile CSV database interface instantiation +""" + +from bob.bio.face.database.replaymobile_csv import ReplayMobileDatabase + + +import bob.core + +logger = bob.core.log.setup("bob.bio.face") + +if 'protocol' not in locals(): + logger.info("protocol not specified, using default: 'grandtest'") + protocol = "grandtest" + +logger.debug(f"Instantiation of ReplayMobile bio database with protocol '{protocol}'") +database = ReplayMobileDatabase(protocol_name=protocol, protocol_definition_path="./csv_datasets/replay-mobile/") # TODO upload the csv files and remove this path. diff --git a/bob/bio/face/database/replaymobile_csv.py b/bob/bio/face/database/replaymobile_csv.py new file mode 100644 index 00000000..6484eb07 --- /dev/null +++ b/bob/bio/face/database/replaymobile_csv.py @@ -0,0 +1,282 @@ +#!/usr/bin/env python +# Yannick Dayer <yannick.dayer@idiap.ch> + +from bob.bio.base.database import CSVDataset, CSVToSampleLoaderBiometrics +from bob.pipelines.datasets.sample_loaders import AnnotationsLoader +from bob.pipelines.sample import DelayedSample +from bob.extension.download import get_file +from sklearn.pipeline import make_pipeline +from bob.io.video import reader +from bob.extension import rc +import bob.core + +import functools +import os.path +import numpy + +logger = bob.core.log.setup("bob.bio.face") + +def load_frame_from_file_replaymobile(file_name, frame, capturing_device): + """Loads a single frame from a video file for replay-mobile. + + A particularity of the replay-mobile is the use of different devices for + capturing the videos ('mobile' and 'tablet'). The orientation of the + resulting videos differs for each device and is encoded in the metadata, + but bob.io.video ignores it. This function correctly rotates the returned + image, given the captured device's name. + + This function uses the video reader utility that does not load the full + video in memory to just access one frame. + + Parameters + ---------- + + file_name: str + The video file to load the frame from + + frame: int + The frame index to load. + + capturing device: str + 'mobile' devices' frames will be flipped vertically. + + Returns + ------- + + image: 3D numpy array + The frame of the video in bob format (channel, height, width) + """ + logger.debug(f"Extracting frame {frame} from '{file_name}'") + video_reader = reader(file_name) + image = video_reader[frame] + # Image captured by the 'mobile' device are flipped vertically. + # (Images were captured horizontally and bob.io.video does not read the + # metadata correctly, whether it was on the right or left side) + if capturing_device == "mobile": + image = numpy.flip(image, 2) + # Convert to bob format (channel, height, width) + image = numpy.transpose(image, (0, 2, 1)) + return image + +def read_frame_annotation_file_replaymobile(file_name, frame): + """Returns the bounding-box for one frame of a video file of replay-mobile. + + Given an annnotation file location and a frame number, returns the bounding + box coordinates corresponding to the frame. + + The replay-mobile annotation files are composed of 4 columns and N rows for + N frames of the video: + + 120 230 40 40 + 125 230 40 40 + ... + <x> <y> <w> <h> + + Parameters + ---------- + + file_name: str + The complete annotation file path and name (with extension). + + frame: int + The video frame index. + """ + logger.debug(f"Reading annotation file '{file_name}', frame {frame}.") + if not file_name: + return None + + if not os.path.exists(file_name): + raise IOError(f"The annotation file '{file_name}' was not found") + + with open(file_name, 'r') as f: + # One line is one frame, each line contains a bounding box coordinates + line = f.readlines()[frame] + + positions = line.split(' ') + + if len(positions) != 4: + raise ValueError(f"The content of '{file_name}' was not correct for frame {frame}") + + annotations = { + 'topleft': (float(positions[1]), float(positions[0])), + 'bottomright':( + float(positions[1])+float(positions[3]), + float(positions[0])+float(positions[2]) + ) + } + + return annotations + +class ReplayMobileCSVFrameSampleLoader(CSVToSampleLoaderBiometrics): + """A loader transformer returning a specific frame of a video file. + + This is specifically tailored for replay-mobile. It uses a specific loader + that takes the capturing device as input. + """ + def __init__( + self, + dataset_original_directory="", + extension="", + reference_id_equal_subject_id=True, + ): + super().__init__( + data_loader=None, + extension=extension, + dataset_original_directory=dataset_original_directory, + ) + self.reference_id_equal_subject_id = reference_id_equal_subject_id + + def convert_row_to_sample(self, row, header): + """Creates one sample given a row of the CSV protocol definition file. + """ + path = row[0] + reference_id = row[1] + id = row[2] # Will be used as 'key' + + kwargs = dict([[str(h).lower(), r] for h, r in zip(header[3:], row[3:])]) + if self.reference_id_equal_subject_id: + kwargs["subject_id"] = reference_id + else: + if "subject_id" not in kwargs: + raise ValueError(f"`subject_id` not available in {header}") + + return DelayedSample( + functools.partial( + load_frame_from_file_replaymobile, + file_name=os.path.join(self.dataset_original_directory, path + self.extension), + frame=int(kwargs["frame"]), + capturing_device=kwargs["capturing_device"], + ), + key=id, + path=path, + reference_id=reference_id, + **kwargs, + ) + + +class FrameBoundingBoxAnnotationLoader(AnnotationsLoader): + """A transformer that adds bounding-box to a sample from annotations files. + + Parameters + ---------- + + annotation_directory: str or None + """ + def __init__(self, + annotation_directory=None, + annotation_extension=".face", + **kwargs + ): + super().__init__( + annotation_directory=annotation_directory, + annotation_extension=annotation_extension, + **kwargs + ) + + def transform(self, X): + """Adds the bounding-box annotations to a series of samples. + """ + if self.annotation_directory is None: + return None + + annotated_samples = [] + for x in X: + + # Build the path to the annotation files structure + annotation_file = os.path.join( + self.annotation_directory, x.path + self.annotation_extension + ) + + annotated_samples.append( + DelayedSample( + x._load, + parent=x, + delayed_attributes=dict( + annotations=functools.partial( + read_frame_annotation_file_replaymobile, + file_name=annotation_file, + frame=int(x.frame), + ) + ), + ) + ) + + return annotated_samples + +class ReplayMobileDatabase(CSVDataset): + """Database interface that loads a csv definition for replay-mobile + + Looks for the protocol definition files (structure of CSV files). If not + present, downloads them. + Then sets the data and annotation paths from __init__ parameters or from + the configuration (``bob config`` command). + + Parameters + ---------- + + protocol_name: str + The protocol to use + + protocol_definition_path: str or None + Specifies a path to download the database definition to. + If None: Downloads and uses the ``bob_data`` config. + (See :py:fct:`bob.extension.download.get_file`) + + data_path: str or None + Overrides the config-defined data location. + If None: uses the ``bob.db.replaymobile.directory`` config. + If None and the config does not exist, set as cwd. + + annotation_path: str or None + Overrides the config-defined annotation files location. + If None: uses the ``bob.db.replaymobile.annotation_directory`` config. + If None and the config does not exist, set as + ``{data_path}/faceloc/rect``. + """ + def __init__( + self, + protocol_name="bio-grandtest", + protocol_definition_path=None, + data_path=None, + annotation_path=None, + **kwargs + ): + if protocol_definition_path is None: + # Downloading database description files if it is not specified + urls = [ + "https://www.idiap.ch/software/bob/databases/latest/replay-mobile-csv.tar.gz", + "http://www.idiap.ch/software/bob/databases/latest/replay-mobile-csv.tar.gz", + ] + protocol_definition_path = get_file("replay-mobile-csv.tar.gz", urls) + + if data_path is None: + # Defaults to cwd if config not defined + data_path = rc.get("bob.db.replaymobile.directory", "") + + if annotation_path is None: + # Defaults to {data_path}/faceloc/rect if config not defined + annotation_path = rc.get( + "bob.db.replaymobile.annotation_directory", + os.path.join(data_path, "faceloc/rect/") + ) + + logger.info(f"Database: Loading database definition from '{protocol_definition_path}'.") + logger.info(f"Database: Defining data files path as '{data_path}'.") + logger.info(f"Database: Defining annotation files path as '{annotation_path}'.") + super().__init__( + protocol_definition_path, + protocol_name, + csv_to_sample_loader=make_pipeline( + ReplayMobileCSVFrameSampleLoader( + dataset_original_directory=data_path, + extension=".mov", + ), + FrameBoundingBoxAnnotationLoader( + annotation_directory=annotation_path, + annotation_extension=".face", + ), + ), + **kwargs + ) + self.annotation_type = "bounding-box" + self.fixed_positions = None diff --git a/bob/bio/face/test/test_databases.py b/bob/bio/face/test/test_databases.py index 7c68ed4e..306a659d 100644 --- a/bob/bio/face/test/test_databases.py +++ b/bob/bio/face/test/test_databases.py @@ -271,6 +271,20 @@ def test_replay_spoof(): ) +@db_available("replaymobile") +def test_replaymobile_csv(): + from bob.bio.face.database.replaymobile_csv import ReplayMobileDatabase # TODO: load resource instead + database = ReplayMobileDatabase("grandtest", protocol_definition_path="./csv_datasets", data_path="") + samples = database.all_samples(groups=("dev","eval")) + assert len(samples) == 8300, len(samples) + assert all([s.path for s in samples]) + # assert samples[0].data.shape == (3, 1280, 720)# TODO data sample? + assert hasattr(samples[0], "annotations") + assert "topleft" in samples[0].annotations + assert "bottomright" in samples[0].annotations + assert hasattr(samples[0], "frame") + + @db_available("replaymobile") def test_replaymobile_licit(): database = bob.bio.base.load_resource( diff --git a/setup.py b/setup.py index ded13e23..7c37af3e 100644 --- a/setup.py +++ b/setup.py @@ -105,10 +105,11 @@ setup( "mobio-all = bob.bio.face.config.database.mobio_all:database", "multipie = bob.bio.face.config.database.multipie:database", "multipie-pose = bob.bio.face.config.database.multipie_pose:database", - "replay-img-licit = bob.bio.face.config.database.replay:replay_licit", - "replay-img-spoof = bob.bio.face.config.database.replay:replay_spoof", - "replaymobile-img-licit = bob.bio.face.config.database.replaymobile:replaymobile_licit", - "replaymobile-img-spoof = bob.bio.face.config.database.replaymobile:replaymobile_spoof", + "replay-img-licit = bob.bio.face.config.database.replay_licit:database", + "replay-img-spoof = bob.bio.face.config.database.replay_spoof:database", + "replaymobile-img-licit = bob.bio.face.config.database.replaymobile_licit:database", + "replaymobile-img-spoof = bob.bio.face.config.database.replaymobile_spoof:database", + "replaymobile-img-csv = bob.bio.face.config.database.replaymobile_csv:database", "fargo = bob.bio.face.config.database.fargo:database", "meds = bob.bio.face.config.database.meds:database", "morph = bob.bio.face.config.database.morph:database", -- GitLab