From 749c6799646ad5989bfdaa3d2b3204a39f26d9c1 Mon Sep 17 00:00:00 2001
From: Pavel Korshunov <pavel.korshunov@idiap.ch>
Date: Tue, 20 Sep 2016 13:44:01 +0200
Subject: [PATCH] Update the existing files to new DB interface

---
 MANIFEST.in                               |   5 +-
 bob/pad/base/__init__.py                  |   2 +-
 bob/pad/base/database/DatabaseBobSpoof.py | 217 ----------------------
 bob/pad/base/test/dummy/__init__.py       |   1 +
 bob/pad/base/test/dummy/database.py       |  30 ++-
 bob/pad/base/test/dummy/preprocessor.py   |   3 -
 bob/pad/base/tools/FileSelector.py        |  11 +-
 bob/pad/base/tools/command_line.py        |   4 +-
 bob/pad/base/tools/preprocessor.py        |  17 +-
 buildout.cfg                              |  24 +--
 requirements.txt                          |   1 -
 11 files changed, 61 insertions(+), 254 deletions(-)
 delete mode 100644 bob/pad/base/database/DatabaseBobSpoof.py

diff --git a/MANIFEST.in b/MANIFEST.in
index a42a0bd..fe2d7a5 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,3 +1,4 @@
-include README.rst bootstrap-buildout.py buildout.cfg COPYING version.txt requirements.txt
+include README.rst bootstrap-buildout.py buildout.cfg develop.cfg version.txt requirements.txt
 recursive-include doc *.py *.rst
-recursive-include bob/bio/base/test/data *-dev*
+recursive-include bob *.txt *.hdf5
+recursive-include bob *.sql3
diff --git a/bob/pad/base/__init__.py b/bob/pad/base/__init__.py
index e48cb43..722c959 100644
--- a/bob/pad/base/__init__.py
+++ b/bob/pad/base/__init__.py
@@ -1,6 +1,6 @@
+from . import database
 from . import algorithm
 from . import tools
-#from . import grid # only one file, not complete directory
 
 from . import script
 from . import test
diff --git a/bob/pad/base/database/DatabaseBobSpoof.py b/bob/pad/base/database/DatabaseBobSpoof.py
deleted file mode 100644
index 1df5406..0000000
--- a/bob/pad/base/database/DatabaseBobSpoof.py
+++ /dev/null
@@ -1,217 +0,0 @@
-#!/usr/bin/env python
-# vim: set fileencoding=utf-8 :
-# @author: Pavel Korshunov <pavel.korshunov@idiap.ch>
-# @date: Wed 19 Aug 13:43:21 2015
-#
-# Copyright (C) 2011-2012 Idiap Research Institute, Martigny, Switzerland
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, version 3 of the License.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-
-from bob.bio.base.database.Database import Database
-import os
-
-import antispoofing.utils.db
-
-
-class DatabaseBobSpoof(Database):
-    """This class can be used whenever you have a database that follows the Bob
-    antispoofing database interface, which is defined in :py:class:`antispoofing.utils.db.Database`
-
-    **Parameters:**
-
-    database : derivative of :py:class:`antispoofing.utils.db.Database`
-      The database instance that provides the actual interface, see :ref:`antispoofing_databases` for a list.
-
-    all_files_options : dict
-      Dictionary of options passed to the :py:meth:`antispoofing.utils.db.Database.objects` database query when retrieving all data.
-
-    check_original_files_for_existence : bool
-      Enables to test for the original data files when querying the database.
-
-    kwargs : ``key=value`` pairs
-      The arguments of the :py:class:`Database` base class constructor.
-
-      .. note:: Usually, the ``name``, ``protocol`` keyword parameters of the base class constructor need to be specified.
-    """
-
-    def __init__(
-            self,
-            database,  # The bob database that is used
-            all_files_options={},  # additional options for the database query that can be used to extract all files
-            original_directory=None,  # the directory where the data files are located
-            check_original_files_for_existence=False,
-            **kwargs  # The default parameters of the base class
-    ):
-
-        Database.__init__(
-            self,
-            **kwargs
-        )
-
-        assert isinstance(database, antispoofing.utils.db.Database), \
-            "Only databases derived from antispoofing.utils.db.Database are supported by this interface. " \
-            "Please implement your own bob.bio.base.database.Database interface for anti-spoofing experiments."
-
-        self.database = database
-        if original_directory is None:
-            self.original_directory = database.original_directory
-        else:
-            self.original_directory = original_directory
-
-        self.all_files_options = all_files_options
-        self.check_existence = check_original_files_for_existence
-
-        self._kwargs = kwargs
-
-    def set_protocol(self, protocol):
-        """
-        Sets the protocol for the database. The protocol can be specified via command line to spoof.py
-        script with option -P
-        :param protocol: name of the protocol
-        :return: None
-        """
-        self.protocol = protocol
-        self.database.set_kwargs({'protocol': protocol})
-
-    def __str__(self):
-        """__str__() -> info
-
-        This function returns all parameters of this class (and its derived class).
-
-        **Returns:**
-
-        info : str
-          A string containing the full information of all parameters of this (and the derived) class.
-        """
-        params = ", ".join(["%s=%s" % (key, value) for key, value in self._kwargs.items()])
-        params += ", original_directory=%s" % (self.original_directory)
-        if self.all_files_options: params += ", all_files_options=%s" % self.all_files_options
-
-        return "%s(%s)" % (str(self.__class__), params)
-
-
-    def replace_directories(self, replacements=None):
-        """This helper function replaces the ``original_directory`` of the database with
-        the directory read from the given replacement file.
-
-        This function is provided for convenience, so that the database
-        configuration files do not need to be modified.
-        Instead, this function uses the given dictionary of replacements to change the original directory.
-
-        The given ``replacements`` can be of type ``dict``, including all replacements,
-        or a file name (as a ``str``), in which case the file is read.
-        The structure of the file should be:
-
-        .. code-block:: text
-
-           # Comments starting with # and empty lines are ignored
-
-           original/path/to/data = /path/to/your/data
-
-        **Parameters:**
-
-        replacements : dict or str
-          A dictionary with replacements, or a name of a file to read the dictionary from.
-          If the file name does not exist, no directories are replaced.
-        """
-        if replacements is None:
-            return
-        if isinstance(replacements, str):
-            if not os.path.exists(replacements):
-                return
-            # Open the database replacement file and reads its content
-            with open(replacements) as f:
-                replacements = {}
-                for line in f:
-                    if line.strip() and not line.startswith("#"):
-                        splits = line.split("=")
-                        assert len(splits) == 2
-                        replacements[splits[0].strip()] = splits[1].strip()
-
-        assert isinstance(replacements, dict)
-
-        if self.original_directory in replacements:
-            self.original_directory = replacements[self.original_directory]
-            self.database.original_directory = self.original_directory
-
-
-    def all_files(self, groups=('train', 'dev', 'eval')):
-        """all_files(groups=('train', 'dev', 'eval')) -> files
-
-        Returns all files of the database, respecting the current protocol.
-
-        **Parameters:**
-
-        groups : some of ``('train', 'dev', 'eval')`` or ``None``
-          The groups to get the data for.
-          If ``None``, data for all groups is returned.
-
-        **Returns:**
-
-        files : [:py:class:`antispoofing.utils.db.File`]
-          The sorted and unique list of all files of the database.
-        """
-        realset = []
-        attackset = []
-        if 'train' in groups:
-            real, attack = self.database.get_train_data()
-            realset += real
-            attackset += attack
-        if 'dev' in groups:
-            real, attack = self.database.get_devel_data()
-            realset += real
-            attackset += attack
-        if 'eval' in groups:
-            real, attack = self.database.get_test_data()
-            realset += real
-            attackset += attack
-        return [realset, attackset]
-
-    def training_files(self, step=None, arrange_by_client=False):
-        """training_files(step = None, arrange_by_client = False) -> files
-
-        Returns all training File objects
-        This function needs to be implemented in derived class implementations.
-
-        **Parameters:**
-            The parameters are not applicable in this version of anti-spoofing experiments
-
-        **Returns:**
-
-        files : [:py:class:`File`] or [[:py:class:`File`]]
-          The (arranged) list of files used for the training.
-        """
-        return self.database.get_train_data()
-
-    def original_file_names(self, files):
-        """original_file_names(files) -> paths
-
-        Returns the full paths of the real and attack data of the given File objects.
-
-        **Parameters:**
-
-        files : [[:py:class:`antispoofing.utils.db.File`], [:py:class:`antispoofing.utils.db.File`]]
-          The list of lists ([real, attack]]) of file object to retrieve the original data file names for.
-
-        **Returns:**
-
-        paths : [str]
-          The paths extracted for the concatenated real+attack files, in the preserved order.
-        """
-        realfiles = files[0]
-        attackfiles = files[1]
-        realpaths = [file.make_path(directory=self.original_directory, extension=self.original_extension) for file in
-                     realfiles]
-        attackpaths = [file.make_path(directory=self.original_directory, extension=self.original_extension) for file in
-                       attackfiles]
-        return realpaths + attackpaths
diff --git a/bob/pad/base/test/dummy/__init__.py b/bob/pad/base/test/dummy/__init__.py
index fc3eb09..0462398 100644
--- a/bob/pad/base/test/dummy/__init__.py
+++ b/bob/pad/base/test/dummy/__init__.py
@@ -1,4 +1,5 @@
 from . import database
+from . import database_sql
 from . import preprocessor
 from . import extractor
 from . import algorithm
diff --git a/bob/pad/base/test/dummy/database.py b/bob/pad/base/test/dummy/database.py
index 7294d24..a6e056e 100644
--- a/bob/pad/base/test/dummy/database.py
+++ b/bob/pad/base/test/dummy/database.py
@@ -21,8 +21,8 @@ import os
 import sys
 import six
 
-from bob.pad.db import PadFile
-from bob.pad.db import PadDatabase
+from bob.pad.base.database import PadFile
+from bob.pad.base.database import PadDatabase
 
 import bob.io.base
 from bob.db.base.driver import Interface as BaseInterface
@@ -35,6 +35,11 @@ dummy_train_list = ['train_real', 'train_attack']
 dummy_devel_list = ['dev_real', 'dev_attack']
 dummy_test_list = ['eval_real', 'eval_attack']
 
+dummy_data = {'train_real': 1.0, 'train_attack': 2.0,
+              'dev_real': 3.0, 'dev_attack': 4.0,
+              'eval_real': 5.0, 'eval_attack': 6.0}
+
+
 class TestFile(PadFile):
     def __init__(self, path, id):
         attack_type = None
@@ -42,6 +47,27 @@ class TestFile(PadFile):
             attack_type = "attack"
         PadFile.__init__(self, client_id=1, path=path, file_id=id, attack_type=attack_type)
 
+    def load(self, directory=None, extension='.hdf5'):
+        """Loads the data at the specified location and using the given extension.
+        Override it if you need to load differently.
+
+        Keyword Parameters:
+
+        data
+          The data blob to be saved (normally a :py:class:`numpy.ndarray`).
+
+        directory
+          [optional] If not empty or None, this directory is prefixed to the final
+          file destination
+
+        extension
+          [optional] The extension of the filename - this will control the type of
+          output and the codec for saving the input blob.
+
+        """
+        # get the path
+        path = self.make_path(directory or '', extension or '')
+        return dummy_data[os.path.basename(path)]
 
 def dumplist(args):
     """Dumps lists of files based on your criteria"""
diff --git a/bob/pad/base/test/dummy/preprocessor.py b/bob/pad/base/test/dummy/preprocessor.py
index d6d76f5..e17e72b 100644
--- a/bob/pad/base/test/dummy/preprocessor.py
+++ b/bob/pad/base/test/dummy/preprocessor.py
@@ -33,8 +33,5 @@ class DummyPreprocessor(Preprocessor):
         """Does nothing, simply converts the data type of the data, ignoring any annotation."""
         return data
 
-    def read_original_data(self, original_file_name):
-        return dummy_data[os.path.basename(original_file_name)]
-
 
 preprocessor = DummyPreprocessor()
diff --git a/bob/pad/base/tools/FileSelector.py b/bob/pad/base/tools/FileSelector.py
index e5cf76b..ce058d2 100644
--- a/bob/pad/base/tools/FileSelector.py
+++ b/bob/pad/base/tools/FileSelector.py
@@ -107,9 +107,18 @@ class FileSelector:
 
     # List of files that will be used for all files
     def original_data_list(self, groups=None):
-        """Returns the tuple of lists of original (real, attack) data that can be used for preprocessing."""
+        """Returns the the joint list of original (real and attack) file names."""
         return self.database.original_file_names(self.database.all_files(groups=groups))
 
+    def original_data_list_files(self, groups=None):
+        """Returns the joint list of original (real and attack) data files that can be used for preprocessing."""
+        files = self.database.all_files(groups=groups)
+        if len(files) != 2:
+            fileset = files
+        else:
+            fileset = files[0]+files[1]
+        return fileset, self.database.original_directory, self.database.original_extension
+
     def preprocessed_data_list(self, groups=None):
         """Returns the tuple of lists (real, attacks) of preprocessed data files."""
         return self.get_paths(self.database.all_files(groups=groups), "preprocessed")
diff --git a/bob/pad/base/tools/command_line.py b/bob/pad/base/tools/command_line.py
index be370f2..ebdb2b4 100644
--- a/bob/pad/base/tools/command_line.py
+++ b/bob/pad/base/tools/command_line.py
@@ -26,6 +26,8 @@ import bob.core
 
 logger = bob.core.log.setup("bob.pad.base")
 
+from bob.pad.base.database import PadDatabase
+
 from bob.bio.base import utils
 from . import FileSelector
 from .. import database
@@ -264,7 +266,7 @@ def initialize(parsers, command_line_parameters=None, skips=[]):
     projector_sub_dir = extractor_sub_dir
 
     # Database directories, which should be automatically replaced
-    if isinstance(args.database, database.DatabaseBobSpoof):
+    if isinstance(args.database, PadDatabase):
         args.database.replace_directories(args.database_directories_file)
 
     # initialize the file selector
diff --git a/bob/pad/base/tools/preprocessor.py b/bob/pad/base/tools/preprocessor.py
index d3a1927..c4576e4 100644
--- a/bob/pad/base/tools/preprocessor.py
+++ b/bob/pad/base/tools/preprocessor.py
@@ -55,8 +55,10 @@ def preprocess(preprocessor, groups=None, indices=None, force=False):
     fs = FileSelector.instance()
 
     # get the file lists
-    data_files = fs.original_data_list(groups=groups)
+    data_files, original_directory, original_extension = fs.original_data_list_files(groups=groups)
     preprocessed_data_files = fs.preprocessed_data_list(groups=groups)
+    print("len of data files: %s" %(str(len(data_files))))
+    print("len of preprocessed data files (paths): %s" %(str(len(preprocessed_data_files))))
 
     # select a subset of keys to iterate
     if indices is not None:
@@ -71,18 +73,23 @@ def preprocess(preprocessor, groups=None, indices=None, force=False):
     # iterate over the selected files
     for i in index_range:
         preprocessed_data_file = str(preprocessed_data_files[i])
+        file_object = data_files[i]
+        file_name = file_object.make_path(original_directory, original_extension)
 
         # check for existence
         if not utils.check_file(preprocessed_data_file, force, 1000):
-            file_name = data_files[i]
-            data = preprocessor.read_original_data(file_name)
+            logger.info("... Processing original data file '%s'", file_name)
+            data = preprocessor.read_original_data(file_object, original_directory, original_extension)
+            # create output directory before reading the data file (is sometimes required, when relative directories are specified, especially, including a .. somewhere)
+            bob.io.base.create_directories_safe(os.path.dirname(preprocessed_data_file))
 
             # call the preprocessor
-            logger.info("- Preprocessor: processing file: %s", file_name)
             preprocessed_data = preprocessor(data, None)
+            if preprocessed_data is None:
+                logger.error("Preprocessing of file '%s' was not successful", file_name)
+                continue
 
             # write the data
-            bob.io.base.create_directories_safe(os.path.dirname(preprocessed_data_file))
             preprocessor.write_data(preprocessed_data, preprocessed_data_file)
 
 
diff --git a/buildout.cfg b/buildout.cfg
index 7ff85b3..0b85449 100644
--- a/buildout.cfg
+++ b/buildout.cfg
@@ -1,31 +1,13 @@
 ; vim: set fileencoding=utf-8 :
-; Pavel Korshunov <Pavel.Korshunov@idiap.ch>
-; Wed 19 Aug 13:43:22 2015
+; Tue 16 Aug 15:00:20 CEST 2016
 
 [buildout]
 parts = scripts
+develop = .
 eggs = bob.pad.base
-       gridtk
-
 extensions = bob.buildout
-             mr.developer
-auto-checkout = *
-develop = src/bob.db.base
-          src/bob.bio.base
-          src/bob.bio.db
-          src/bob.pad.db
-          .
-         
-; options for bob.buildout
-debug = true
-verbose = true
 newest = false
-
-[sources]
-bob.db.base = git branch=refactoring_2016 git@github.com:bioidiap/bob.db.base.git
-bob.bio.base = git https://github.com/bioidiap/bob.bio.base
-bob.bio.db = git git@gitlab.idiap.ch:biometric/bob.bio.db.git
-bob.pad.db = git git@gitlab.idiap.ch:biometric/bob.pad.db.git
+verbose = true
 
 [scripts]
 recipe = bob.buildout:scripts
diff --git a/requirements.txt b/requirements.txt
index 2d0335a..b1790d7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,4 @@
 setuptools
 bob.extension
-bob.io.base
 bob.db.base
 bob.bio.base
-- 
GitLab