From c7ad23c5a4fc11db943d64313ff04608436c8d48 Mon Sep 17 00:00:00 2001
From: Yannick DAYER <yannick.dayer@idiap.ch>
Date: Fri, 12 Mar 2021 16:29:26 +0100
Subject: [PATCH] Add a CSV database interface for replay-mobile

---
 .../face/config/database/replaymobile_csv.py  |  21 ++
 bob/bio/face/database/replaymobile_csv.py     | 282 ++++++++++++++++++
 bob/bio/face/test/test_databases.py           |  14 +
 setup.py                                      |   9 +-
 4 files changed, 322 insertions(+), 4 deletions(-)
 create mode 100644 bob/bio/face/config/database/replaymobile_csv.py
 create mode 100644 bob/bio/face/database/replaymobile_csv.py

diff --git a/bob/bio/face/config/database/replaymobile_csv.py b/bob/bio/face/config/database/replaymobile_csv.py
new file mode 100644
index 00000000..556d576d
--- /dev/null
+++ b/bob/bio/face/config/database/replaymobile_csv.py
@@ -0,0 +1,21 @@
+#!/usr/bin/env python
+# vim: set fileencoding=utf-8 :
+# Yannick Dayer <yannick.dayer@idiap.ch>
+
+"""
+  Replay-mobile CSV database interface instantiation
+"""
+
+from bob.bio.face.database.replaymobile_csv import ReplayMobileDatabase
+
+
+import bob.core
+
+logger = bob.core.log.setup("bob.bio.face")
+
+if 'protocol' not in locals():
+    logger.info("protocol not specified, using default: 'grandtest'")
+    protocol = "grandtest"
+
+logger.debug(f"Instantiation of ReplayMobile bio database with protocol '{protocol}'")
+database = ReplayMobileDatabase(protocol_name=protocol, protocol_definition_path="./csv_datasets/replay-mobile/") # TODO upload the csv files and remove this path.
diff --git a/bob/bio/face/database/replaymobile_csv.py b/bob/bio/face/database/replaymobile_csv.py
new file mode 100644
index 00000000..6484eb07
--- /dev/null
+++ b/bob/bio/face/database/replaymobile_csv.py
@@ -0,0 +1,282 @@
+#!/usr/bin/env python
+# Yannick Dayer <yannick.dayer@idiap.ch>
+
+from bob.bio.base.database import CSVDataset, CSVToSampleLoaderBiometrics
+from bob.pipelines.datasets.sample_loaders import AnnotationsLoader
+from bob.pipelines.sample import DelayedSample
+from bob.extension.download import get_file
+from sklearn.pipeline import make_pipeline
+from bob.io.video import reader
+from bob.extension import rc
+import bob.core
+
+import functools
+import os.path
+import numpy
+
+logger = bob.core.log.setup("bob.bio.face")
+
+def load_frame_from_file_replaymobile(file_name, frame, capturing_device):
+    """Loads a single frame from a video file for replay-mobile.
+
+    A particularity of the replay-mobile is the use of different devices for
+    capturing the videos ('mobile' and 'tablet'). The orientation of the
+    resulting videos differs for each device and is encoded in the metadata,
+    but bob.io.video ignores it. This function correctly rotates the returned
+    image, given the captured device's name.
+
+    This function uses the video reader utility that does not load the full
+    video in memory to just access one frame.
+
+    Parameters
+    ----------
+
+    file_name: str
+        The video file to load the frame from
+
+    frame: int
+        The frame index to load.
+
+    capturing device: str
+        'mobile' devices' frames will be flipped vertically.
+
+    Returns
+    -------
+
+    image: 3D numpy array
+        The frame of the video in bob format (channel, height, width)
+    """
+    logger.debug(f"Extracting frame {frame} from '{file_name}'")
+    video_reader = reader(file_name)
+    image = video_reader[frame]
+    # Image captured by the 'mobile' device are flipped vertically.
+    # (Images were captured horizontally and bob.io.video does not read the
+    #   metadata correctly, whether it was on the right or left side)
+    if capturing_device == "mobile":
+        image = numpy.flip(image, 2)
+    # Convert to bob format (channel, height, width)
+    image = numpy.transpose(image, (0, 2, 1))
+    return image
+
+def read_frame_annotation_file_replaymobile(file_name, frame):
+    """Returns the bounding-box for one frame of a video file of replay-mobile.
+
+    Given an annnotation file location and a frame number, returns the bounding
+    box coordinates corresponding to the frame.
+
+    The replay-mobile annotation files are composed of 4 columns and N rows for
+    N frames of the video:
+
+    120 230 40 40
+    125 230 40 40
+    ...
+    <x> <y> <w> <h>
+
+    Parameters
+    ----------
+
+    file_name: str
+        The complete annotation file path and name (with extension).
+
+    frame: int
+        The video frame index.
+    """
+    logger.debug(f"Reading annotation file '{file_name}', frame {frame}.")
+    if not file_name:
+        return None
+
+    if not os.path.exists(file_name):
+        raise IOError(f"The annotation file '{file_name}' was not found")
+
+    with open(file_name, 'r') as f:
+        # One line is one frame, each line contains a bounding box coordinates
+        line = f.readlines()[frame]
+
+    positions = line.split(' ')
+
+    if len(positions) != 4:
+        raise ValueError(f"The content of '{file_name}' was not correct for frame {frame}")
+
+    annotations = {
+        'topleft': (float(positions[1]), float(positions[0])),
+        'bottomright':(
+            float(positions[1])+float(positions[3]),
+            float(positions[0])+float(positions[2])
+        )
+    }
+
+    return annotations
+
+class ReplayMobileCSVFrameSampleLoader(CSVToSampleLoaderBiometrics):
+    """A loader transformer returning a specific frame of a video file.
+
+    This is specifically tailored for replay-mobile. It uses a specific loader
+    that takes the capturing device as input.
+    """
+    def __init__(
+        self,
+        dataset_original_directory="",
+        extension="",
+        reference_id_equal_subject_id=True,
+    ):
+        super().__init__(
+            data_loader=None,
+            extension=extension,
+            dataset_original_directory=dataset_original_directory,
+        )
+        self.reference_id_equal_subject_id = reference_id_equal_subject_id
+
+    def convert_row_to_sample(self, row, header):
+        """Creates one sample given a row of the CSV protocol definition file.
+        """
+        path = row[0]
+        reference_id = row[1]
+        id = row[2] # Will be used as 'key'
+
+        kwargs = dict([[str(h).lower(), r] for h, r in zip(header[3:], row[3:])])
+        if self.reference_id_equal_subject_id:
+            kwargs["subject_id"] = reference_id
+        else:
+            if "subject_id" not in kwargs:
+                raise ValueError(f"`subject_id` not available in {header}")
+
+        return DelayedSample(
+            functools.partial(
+                load_frame_from_file_replaymobile,
+                file_name=os.path.join(self.dataset_original_directory, path + self.extension),
+                frame=int(kwargs["frame"]),
+                capturing_device=kwargs["capturing_device"],
+            ),
+            key=id,
+            path=path,
+            reference_id=reference_id,
+            **kwargs,
+        )
+
+
+class FrameBoundingBoxAnnotationLoader(AnnotationsLoader):
+    """A transformer that adds bounding-box to a sample from annotations files.
+
+    Parameters
+    ----------
+
+    annotation_directory: str or None
+    """
+    def __init__(self,
+        annotation_directory=None,
+        annotation_extension=".face",
+        **kwargs
+    ):
+        super().__init__(
+            annotation_directory=annotation_directory,
+            annotation_extension=annotation_extension,
+            **kwargs
+        )
+
+    def transform(self, X):
+        """Adds the bounding-box annotations to a series of samples.
+        """
+        if self.annotation_directory is None:
+            return None
+
+        annotated_samples = []
+        for x in X:
+
+            # Build the path to the annotation files structure
+            annotation_file = os.path.join(
+                self.annotation_directory, x.path + self.annotation_extension
+            )
+
+            annotated_samples.append(
+                DelayedSample(
+                    x._load,
+                    parent=x,
+                    delayed_attributes=dict(
+                        annotations=functools.partial(
+                            read_frame_annotation_file_replaymobile,
+                            file_name=annotation_file,
+                            frame=int(x.frame),
+                        )
+                    ),
+                )
+            )
+
+        return annotated_samples
+
+class ReplayMobileDatabase(CSVDataset):
+    """Database interface that loads a csv definition for replay-mobile
+
+    Looks for the protocol definition files (structure of CSV files). If not
+    present, downloads them.
+    Then sets the data and annotation paths from __init__ parameters or from
+    the configuration (``bob config`` command).
+
+    Parameters
+    ----------
+
+    protocol_name: str
+        The protocol to use
+
+    protocol_definition_path: str or None
+        Specifies a path to download the database definition to.
+        If None: Downloads and uses the ``bob_data`` config.
+        (See :py:fct:`bob.extension.download.get_file`)
+
+    data_path: str or None
+        Overrides the config-defined data location.
+        If None: uses the ``bob.db.replaymobile.directory`` config.
+        If None and the config does not exist, set as cwd.
+
+    annotation_path: str or None
+        Overrides the config-defined annotation files location.
+        If None: uses the ``bob.db.replaymobile.annotation_directory`` config.
+        If None and the config does not exist, set as
+        ``{data_path}/faceloc/rect``.
+    """
+    def __init__(
+        self,
+        protocol_name="bio-grandtest",
+        protocol_definition_path=None,
+        data_path=None,
+        annotation_path=None,
+        **kwargs
+    ):
+        if protocol_definition_path is None:
+            # Downloading database description files if it is not specified
+            urls = [
+                "https://www.idiap.ch/software/bob/databases/latest/replay-mobile-csv.tar.gz",
+                "http://www.idiap.ch/software/bob/databases/latest/replay-mobile-csv.tar.gz",
+            ]
+            protocol_definition_path = get_file("replay-mobile-csv.tar.gz", urls)
+
+        if data_path is None:
+            # Defaults to cwd if config not defined
+            data_path = rc.get("bob.db.replaymobile.directory", "")
+
+        if annotation_path is None:
+            # Defaults to {data_path}/faceloc/rect if config not defined
+            annotation_path = rc.get(
+                "bob.db.replaymobile.annotation_directory",
+                os.path.join(data_path, "faceloc/rect/")
+            )
+
+        logger.info(f"Database: Loading database definition from '{protocol_definition_path}'.")
+        logger.info(f"Database: Defining data files path as '{data_path}'.")
+        logger.info(f"Database: Defining annotation files path as '{annotation_path}'.")
+        super().__init__(
+            protocol_definition_path,
+            protocol_name,
+            csv_to_sample_loader=make_pipeline(
+                ReplayMobileCSVFrameSampleLoader(
+                    dataset_original_directory=data_path,
+                    extension=".mov",
+                ),
+                FrameBoundingBoxAnnotationLoader(
+                    annotation_directory=annotation_path,
+                    annotation_extension=".face",
+                ),
+            ),
+            **kwargs
+        )
+        self.annotation_type = "bounding-box"
+        self.fixed_positions = None
diff --git a/bob/bio/face/test/test_databases.py b/bob/bio/face/test/test_databases.py
index 7c68ed4e..306a659d 100644
--- a/bob/bio/face/test/test_databases.py
+++ b/bob/bio/face/test/test_databases.py
@@ -271,6 +271,20 @@ def test_replay_spoof():
         )
 
 
+@db_available("replaymobile")
+def test_replaymobile_csv():
+    from bob.bio.face.database.replaymobile_csv import ReplayMobileDatabase # TODO: load resource instead
+    database = ReplayMobileDatabase("grandtest", protocol_definition_path="./csv_datasets", data_path="")
+    samples = database.all_samples(groups=("dev","eval"))
+    assert len(samples) == 8300, len(samples)
+    assert all([s.path for s in samples])
+    # assert samples[0].data.shape == (3, 1280, 720)# TODO data sample?
+    assert hasattr(samples[0], "annotations")
+    assert "topleft" in samples[0].annotations
+    assert "bottomright" in samples[0].annotations
+    assert hasattr(samples[0], "frame")
+
+
 @db_available("replaymobile")
 def test_replaymobile_licit():
     database = bob.bio.base.load_resource(
diff --git a/setup.py b/setup.py
index ded13e23..7c37af3e 100644
--- a/setup.py
+++ b/setup.py
@@ -105,10 +105,11 @@ setup(
             "mobio-all        = bob.bio.face.config.database.mobio_all:database",
             "multipie          = bob.bio.face.config.database.multipie:database",
             "multipie-pose     = bob.bio.face.config.database.multipie_pose:database",
-            "replay-img-licit  = bob.bio.face.config.database.replay:replay_licit",
-            "replay-img-spoof  = bob.bio.face.config.database.replay:replay_spoof",
-            "replaymobile-img-licit  = bob.bio.face.config.database.replaymobile:replaymobile_licit",
-            "replaymobile-img-spoof  = bob.bio.face.config.database.replaymobile:replaymobile_spoof",
+            "replay-img-licit  = bob.bio.face.config.database.replay_licit:database",
+            "replay-img-spoof  = bob.bio.face.config.database.replay_spoof:database",
+            "replaymobile-img-licit  = bob.bio.face.config.database.replaymobile_licit:database",
+            "replaymobile-img-spoof  = bob.bio.face.config.database.replaymobile_spoof:database",
+            "replaymobile-img-csv    = bob.bio.face.config.database.replaymobile_csv:database",
             "fargo  = bob.bio.face.config.database.fargo:database",
             "meds = bob.bio.face.config.database.meds:database",
             "morph = bob.bio.face.config.database.morph:database",
-- 
GitLab