From 749c6799646ad5989bfdaa3d2b3204a39f26d9c1 Mon Sep 17 00:00:00 2001 From: Pavel Korshunov <pavel.korshunov@idiap.ch> Date: Tue, 20 Sep 2016 13:44:01 +0200 Subject: [PATCH] Update the existing files to new DB interface --- MANIFEST.in | 5 +- bob/pad/base/__init__.py | 2 +- bob/pad/base/database/DatabaseBobSpoof.py | 217 ---------------------- bob/pad/base/test/dummy/__init__.py | 1 + bob/pad/base/test/dummy/database.py | 30 ++- bob/pad/base/test/dummy/preprocessor.py | 3 - bob/pad/base/tools/FileSelector.py | 11 +- bob/pad/base/tools/command_line.py | 4 +- bob/pad/base/tools/preprocessor.py | 17 +- buildout.cfg | 24 +-- requirements.txt | 1 - 11 files changed, 61 insertions(+), 254 deletions(-) delete mode 100644 bob/pad/base/database/DatabaseBobSpoof.py diff --git a/MANIFEST.in b/MANIFEST.in index a42a0bd..fe2d7a5 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,4 @@ -include README.rst bootstrap-buildout.py buildout.cfg COPYING version.txt requirements.txt +include README.rst bootstrap-buildout.py buildout.cfg develop.cfg version.txt requirements.txt recursive-include doc *.py *.rst -recursive-include bob/bio/base/test/data *-dev* +recursive-include bob *.txt *.hdf5 +recursive-include bob *.sql3 diff --git a/bob/pad/base/__init__.py b/bob/pad/base/__init__.py index e48cb43..722c959 100644 --- a/bob/pad/base/__init__.py +++ b/bob/pad/base/__init__.py @@ -1,6 +1,6 @@ +from . import database from . import algorithm from . import tools -#from . import grid # only one file, not complete directory from . import script from . import test diff --git a/bob/pad/base/database/DatabaseBobSpoof.py b/bob/pad/base/database/DatabaseBobSpoof.py deleted file mode 100644 index 1df5406..0000000 --- a/bob/pad/base/database/DatabaseBobSpoof.py +++ /dev/null @@ -1,217 +0,0 @@ -#!/usr/bin/env python -# vim: set fileencoding=utf-8 : -# @author: Pavel Korshunov <pavel.korshunov@idiap.ch> -# @date: Wed 19 Aug 13:43:21 2015 -# -# Copyright (C) 2011-2012 Idiap Research Institute, Martigny, Switzerland -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, version 3 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. - -from bob.bio.base.database.Database import Database -import os - -import antispoofing.utils.db - - -class DatabaseBobSpoof(Database): - """This class can be used whenever you have a database that follows the Bob - antispoofing database interface, which is defined in :py:class:`antispoofing.utils.db.Database` - - **Parameters:** - - database : derivative of :py:class:`antispoofing.utils.db.Database` - The database instance that provides the actual interface, see :ref:`antispoofing_databases` for a list. - - all_files_options : dict - Dictionary of options passed to the :py:meth:`antispoofing.utils.db.Database.objects` database query when retrieving all data. - - check_original_files_for_existence : bool - Enables to test for the original data files when querying the database. - - kwargs : ``key=value`` pairs - The arguments of the :py:class:`Database` base class constructor. - - .. note:: Usually, the ``name``, ``protocol`` keyword parameters of the base class constructor need to be specified. - """ - - def __init__( - self, - database, # The bob database that is used - all_files_options={}, # additional options for the database query that can be used to extract all files - original_directory=None, # the directory where the data files are located - check_original_files_for_existence=False, - **kwargs # The default parameters of the base class - ): - - Database.__init__( - self, - **kwargs - ) - - assert isinstance(database, antispoofing.utils.db.Database), \ - "Only databases derived from antispoofing.utils.db.Database are supported by this interface. " \ - "Please implement your own bob.bio.base.database.Database interface for anti-spoofing experiments." - - self.database = database - if original_directory is None: - self.original_directory = database.original_directory - else: - self.original_directory = original_directory - - self.all_files_options = all_files_options - self.check_existence = check_original_files_for_existence - - self._kwargs = kwargs - - def set_protocol(self, protocol): - """ - Sets the protocol for the database. The protocol can be specified via command line to spoof.py - script with option -P - :param protocol: name of the protocol - :return: None - """ - self.protocol = protocol - self.database.set_kwargs({'protocol': protocol}) - - def __str__(self): - """__str__() -> info - - This function returns all parameters of this class (and its derived class). - - **Returns:** - - info : str - A string containing the full information of all parameters of this (and the derived) class. - """ - params = ", ".join(["%s=%s" % (key, value) for key, value in self._kwargs.items()]) - params += ", original_directory=%s" % (self.original_directory) - if self.all_files_options: params += ", all_files_options=%s" % self.all_files_options - - return "%s(%s)" % (str(self.__class__), params) - - - def replace_directories(self, replacements=None): - """This helper function replaces the ``original_directory`` of the database with - the directory read from the given replacement file. - - This function is provided for convenience, so that the database - configuration files do not need to be modified. - Instead, this function uses the given dictionary of replacements to change the original directory. - - The given ``replacements`` can be of type ``dict``, including all replacements, - or a file name (as a ``str``), in which case the file is read. - The structure of the file should be: - - .. code-block:: text - - # Comments starting with # and empty lines are ignored - - original/path/to/data = /path/to/your/data - - **Parameters:** - - replacements : dict or str - A dictionary with replacements, or a name of a file to read the dictionary from. - If the file name does not exist, no directories are replaced. - """ - if replacements is None: - return - if isinstance(replacements, str): - if not os.path.exists(replacements): - return - # Open the database replacement file and reads its content - with open(replacements) as f: - replacements = {} - for line in f: - if line.strip() and not line.startswith("#"): - splits = line.split("=") - assert len(splits) == 2 - replacements[splits[0].strip()] = splits[1].strip() - - assert isinstance(replacements, dict) - - if self.original_directory in replacements: - self.original_directory = replacements[self.original_directory] - self.database.original_directory = self.original_directory - - - def all_files(self, groups=('train', 'dev', 'eval')): - """all_files(groups=('train', 'dev', 'eval')) -> files - - Returns all files of the database, respecting the current protocol. - - **Parameters:** - - groups : some of ``('train', 'dev', 'eval')`` or ``None`` - The groups to get the data for. - If ``None``, data for all groups is returned. - - **Returns:** - - files : [:py:class:`antispoofing.utils.db.File`] - The sorted and unique list of all files of the database. - """ - realset = [] - attackset = [] - if 'train' in groups: - real, attack = self.database.get_train_data() - realset += real - attackset += attack - if 'dev' in groups: - real, attack = self.database.get_devel_data() - realset += real - attackset += attack - if 'eval' in groups: - real, attack = self.database.get_test_data() - realset += real - attackset += attack - return [realset, attackset] - - def training_files(self, step=None, arrange_by_client=False): - """training_files(step = None, arrange_by_client = False) -> files - - Returns all training File objects - This function needs to be implemented in derived class implementations. - - **Parameters:** - The parameters are not applicable in this version of anti-spoofing experiments - - **Returns:** - - files : [:py:class:`File`] or [[:py:class:`File`]] - The (arranged) list of files used for the training. - """ - return self.database.get_train_data() - - def original_file_names(self, files): - """original_file_names(files) -> paths - - Returns the full paths of the real and attack data of the given File objects. - - **Parameters:** - - files : [[:py:class:`antispoofing.utils.db.File`], [:py:class:`antispoofing.utils.db.File`]] - The list of lists ([real, attack]]) of file object to retrieve the original data file names for. - - **Returns:** - - paths : [str] - The paths extracted for the concatenated real+attack files, in the preserved order. - """ - realfiles = files[0] - attackfiles = files[1] - realpaths = [file.make_path(directory=self.original_directory, extension=self.original_extension) for file in - realfiles] - attackpaths = [file.make_path(directory=self.original_directory, extension=self.original_extension) for file in - attackfiles] - return realpaths + attackpaths diff --git a/bob/pad/base/test/dummy/__init__.py b/bob/pad/base/test/dummy/__init__.py index fc3eb09..0462398 100644 --- a/bob/pad/base/test/dummy/__init__.py +++ b/bob/pad/base/test/dummy/__init__.py @@ -1,4 +1,5 @@ from . import database +from . import database_sql from . import preprocessor from . import extractor from . import algorithm diff --git a/bob/pad/base/test/dummy/database.py b/bob/pad/base/test/dummy/database.py index 7294d24..a6e056e 100644 --- a/bob/pad/base/test/dummy/database.py +++ b/bob/pad/base/test/dummy/database.py @@ -21,8 +21,8 @@ import os import sys import six -from bob.pad.db import PadFile -from bob.pad.db import PadDatabase +from bob.pad.base.database import PadFile +from bob.pad.base.database import PadDatabase import bob.io.base from bob.db.base.driver import Interface as BaseInterface @@ -35,6 +35,11 @@ dummy_train_list = ['train_real', 'train_attack'] dummy_devel_list = ['dev_real', 'dev_attack'] dummy_test_list = ['eval_real', 'eval_attack'] +dummy_data = {'train_real': 1.0, 'train_attack': 2.0, + 'dev_real': 3.0, 'dev_attack': 4.0, + 'eval_real': 5.0, 'eval_attack': 6.0} + + class TestFile(PadFile): def __init__(self, path, id): attack_type = None @@ -42,6 +47,27 @@ class TestFile(PadFile): attack_type = "attack" PadFile.__init__(self, client_id=1, path=path, file_id=id, attack_type=attack_type) + def load(self, directory=None, extension='.hdf5'): + """Loads the data at the specified location and using the given extension. + Override it if you need to load differently. + + Keyword Parameters: + + data + The data blob to be saved (normally a :py:class:`numpy.ndarray`). + + directory + [optional] If not empty or None, this directory is prefixed to the final + file destination + + extension + [optional] The extension of the filename - this will control the type of + output and the codec for saving the input blob. + + """ + # get the path + path = self.make_path(directory or '', extension or '') + return dummy_data[os.path.basename(path)] def dumplist(args): """Dumps lists of files based on your criteria""" diff --git a/bob/pad/base/test/dummy/preprocessor.py b/bob/pad/base/test/dummy/preprocessor.py index d6d76f5..e17e72b 100644 --- a/bob/pad/base/test/dummy/preprocessor.py +++ b/bob/pad/base/test/dummy/preprocessor.py @@ -33,8 +33,5 @@ class DummyPreprocessor(Preprocessor): """Does nothing, simply converts the data type of the data, ignoring any annotation.""" return data - def read_original_data(self, original_file_name): - return dummy_data[os.path.basename(original_file_name)] - preprocessor = DummyPreprocessor() diff --git a/bob/pad/base/tools/FileSelector.py b/bob/pad/base/tools/FileSelector.py index e5cf76b..ce058d2 100644 --- a/bob/pad/base/tools/FileSelector.py +++ b/bob/pad/base/tools/FileSelector.py @@ -107,9 +107,18 @@ class FileSelector: # List of files that will be used for all files def original_data_list(self, groups=None): - """Returns the tuple of lists of original (real, attack) data that can be used for preprocessing.""" + """Returns the the joint list of original (real and attack) file names.""" return self.database.original_file_names(self.database.all_files(groups=groups)) + def original_data_list_files(self, groups=None): + """Returns the joint list of original (real and attack) data files that can be used for preprocessing.""" + files = self.database.all_files(groups=groups) + if len(files) != 2: + fileset = files + else: + fileset = files[0]+files[1] + return fileset, self.database.original_directory, self.database.original_extension + def preprocessed_data_list(self, groups=None): """Returns the tuple of lists (real, attacks) of preprocessed data files.""" return self.get_paths(self.database.all_files(groups=groups), "preprocessed") diff --git a/bob/pad/base/tools/command_line.py b/bob/pad/base/tools/command_line.py index be370f2..ebdb2b4 100644 --- a/bob/pad/base/tools/command_line.py +++ b/bob/pad/base/tools/command_line.py @@ -26,6 +26,8 @@ import bob.core logger = bob.core.log.setup("bob.pad.base") +from bob.pad.base.database import PadDatabase + from bob.bio.base import utils from . import FileSelector from .. import database @@ -264,7 +266,7 @@ def initialize(parsers, command_line_parameters=None, skips=[]): projector_sub_dir = extractor_sub_dir # Database directories, which should be automatically replaced - if isinstance(args.database, database.DatabaseBobSpoof): + if isinstance(args.database, PadDatabase): args.database.replace_directories(args.database_directories_file) # initialize the file selector diff --git a/bob/pad/base/tools/preprocessor.py b/bob/pad/base/tools/preprocessor.py index d3a1927..c4576e4 100644 --- a/bob/pad/base/tools/preprocessor.py +++ b/bob/pad/base/tools/preprocessor.py @@ -55,8 +55,10 @@ def preprocess(preprocessor, groups=None, indices=None, force=False): fs = FileSelector.instance() # get the file lists - data_files = fs.original_data_list(groups=groups) + data_files, original_directory, original_extension = fs.original_data_list_files(groups=groups) preprocessed_data_files = fs.preprocessed_data_list(groups=groups) + print("len of data files: %s" %(str(len(data_files)))) + print("len of preprocessed data files (paths): %s" %(str(len(preprocessed_data_files)))) # select a subset of keys to iterate if indices is not None: @@ -71,18 +73,23 @@ def preprocess(preprocessor, groups=None, indices=None, force=False): # iterate over the selected files for i in index_range: preprocessed_data_file = str(preprocessed_data_files[i]) + file_object = data_files[i] + file_name = file_object.make_path(original_directory, original_extension) # check for existence if not utils.check_file(preprocessed_data_file, force, 1000): - file_name = data_files[i] - data = preprocessor.read_original_data(file_name) + logger.info("... Processing original data file '%s'", file_name) + data = preprocessor.read_original_data(file_object, original_directory, original_extension) + # create output directory before reading the data file (is sometimes required, when relative directories are specified, especially, including a .. somewhere) + bob.io.base.create_directories_safe(os.path.dirname(preprocessed_data_file)) # call the preprocessor - logger.info("- Preprocessor: processing file: %s", file_name) preprocessed_data = preprocessor(data, None) + if preprocessed_data is None: + logger.error("Preprocessing of file '%s' was not successful", file_name) + continue # write the data - bob.io.base.create_directories_safe(os.path.dirname(preprocessed_data_file)) preprocessor.write_data(preprocessed_data, preprocessed_data_file) diff --git a/buildout.cfg b/buildout.cfg index 7ff85b3..0b85449 100644 --- a/buildout.cfg +++ b/buildout.cfg @@ -1,31 +1,13 @@ ; vim: set fileencoding=utf-8 : -; Pavel Korshunov <Pavel.Korshunov@idiap.ch> -; Wed 19 Aug 13:43:22 2015 +; Tue 16 Aug 15:00:20 CEST 2016 [buildout] parts = scripts +develop = . eggs = bob.pad.base - gridtk - extensions = bob.buildout - mr.developer -auto-checkout = * -develop = src/bob.db.base - src/bob.bio.base - src/bob.bio.db - src/bob.pad.db - . - -; options for bob.buildout -debug = true -verbose = true newest = false - -[sources] -bob.db.base = git branch=refactoring_2016 git@github.com:bioidiap/bob.db.base.git -bob.bio.base = git https://github.com/bioidiap/bob.bio.base -bob.bio.db = git git@gitlab.idiap.ch:biometric/bob.bio.db.git -bob.pad.db = git git@gitlab.idiap.ch:biometric/bob.pad.db.git +verbose = true [scripts] recipe = bob.buildout:scripts diff --git a/requirements.txt b/requirements.txt index 2d0335a..b1790d7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,4 @@ setuptools bob.extension -bob.io.base bob.db.base bob.bio.base -- GitLab