Commit 749c6799 authored by Pavel KORSHUNOV's avatar Pavel KORSHUNOV

Update the existing files to new DB interface

parent 3312b848
Pipeline #3822 failed with stages
in 38 minutes and 56 seconds
include README.rst bootstrap-buildout.py buildout.cfg COPYING version.txt requirements.txt
include README.rst bootstrap-buildout.py buildout.cfg develop.cfg version.txt requirements.txt
recursive-include doc *.py *.rst
recursive-include bob/bio/base/test/data *-dev*
recursive-include bob *.txt *.hdf5
recursive-include bob *.sql3
from . import database
from . import algorithm
from . import tools
#from . import grid # only one file, not complete directory
from . import script
from . import test
......
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# @author: Pavel Korshunov <pavel.korshunov@idiap.ch>
# @date: Wed 19 Aug 13:43:21 2015
#
# Copyright (C) 2011-2012 Idiap Research Institute, Martigny, Switzerland
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from bob.bio.base.database.Database import Database
import os
import antispoofing.utils.db
class DatabaseBobSpoof(Database):
"""This class can be used whenever you have a database that follows the Bob
antispoofing database interface, which is defined in :py:class:`antispoofing.utils.db.Database`
**Parameters:**
database : derivative of :py:class:`antispoofing.utils.db.Database`
The database instance that provides the actual interface, see :ref:`antispoofing_databases` for a list.
all_files_options : dict
Dictionary of options passed to the :py:meth:`antispoofing.utils.db.Database.objects` database query when retrieving all data.
check_original_files_for_existence : bool
Enables to test for the original data files when querying the database.
kwargs : ``key=value`` pairs
The arguments of the :py:class:`Database` base class constructor.
.. note:: Usually, the ``name``, ``protocol`` keyword parameters of the base class constructor need to be specified.
"""
def __init__(
self,
database, # The bob database that is used
all_files_options={}, # additional options for the database query that can be used to extract all files
original_directory=None, # the directory where the data files are located
check_original_files_for_existence=False,
**kwargs # The default parameters of the base class
):
Database.__init__(
self,
**kwargs
)
assert isinstance(database, antispoofing.utils.db.Database), \
"Only databases derived from antispoofing.utils.db.Database are supported by this interface. " \
"Please implement your own bob.bio.base.database.Database interface for anti-spoofing experiments."
self.database = database
if original_directory is None:
self.original_directory = database.original_directory
else:
self.original_directory = original_directory
self.all_files_options = all_files_options
self.check_existence = check_original_files_for_existence
self._kwargs = kwargs
def set_protocol(self, protocol):
"""
Sets the protocol for the database. The protocol can be specified via command line to spoof.py
script with option -P
:param protocol: name of the protocol
:return: None
"""
self.protocol = protocol
self.database.set_kwargs({'protocol': protocol})
def __str__(self):
"""__str__() -> info
This function returns all parameters of this class (and its derived class).
**Returns:**
info : str
A string containing the full information of all parameters of this (and the derived) class.
"""
params = ", ".join(["%s=%s" % (key, value) for key, value in self._kwargs.items()])
params += ", original_directory=%s" % (self.original_directory)
if self.all_files_options: params += ", all_files_options=%s" % self.all_files_options
return "%s(%s)" % (str(self.__class__), params)
def replace_directories(self, replacements=None):
"""This helper function replaces the ``original_directory`` of the database with
the directory read from the given replacement file.
This function is provided for convenience, so that the database
configuration files do not need to be modified.
Instead, this function uses the given dictionary of replacements to change the original directory.
The given ``replacements`` can be of type ``dict``, including all replacements,
or a file name (as a ``str``), in which case the file is read.
The structure of the file should be:
.. code-block:: text
# Comments starting with # and empty lines are ignored
original/path/to/data = /path/to/your/data
**Parameters:**
replacements : dict or str
A dictionary with replacements, or a name of a file to read the dictionary from.
If the file name does not exist, no directories are replaced.
"""
if replacements is None:
return
if isinstance(replacements, str):
if not os.path.exists(replacements):
return
# Open the database replacement file and reads its content
with open(replacements) as f:
replacements = {}
for line in f:
if line.strip() and not line.startswith("#"):
splits = line.split("=")
assert len(splits) == 2
replacements[splits[0].strip()] = splits[1].strip()
assert isinstance(replacements, dict)
if self.original_directory in replacements:
self.original_directory = replacements[self.original_directory]
self.database.original_directory = self.original_directory
def all_files(self, groups=('train', 'dev', 'eval')):
"""all_files(groups=('train', 'dev', 'eval')) -> files
Returns all files of the database, respecting the current protocol.
**Parameters:**
groups : some of ``('train', 'dev', 'eval')`` or ``None``
The groups to get the data for.
If ``None``, data for all groups is returned.
**Returns:**
files : [:py:class:`antispoofing.utils.db.File`]
The sorted and unique list of all files of the database.
"""
realset = []
attackset = []
if 'train' in groups:
real, attack = self.database.get_train_data()
realset += real
attackset += attack
if 'dev' in groups:
real, attack = self.database.get_devel_data()
realset += real
attackset += attack
if 'eval' in groups:
real, attack = self.database.get_test_data()
realset += real
attackset += attack
return [realset, attackset]
def training_files(self, step=None, arrange_by_client=False):
"""training_files(step = None, arrange_by_client = False) -> files
Returns all training File objects
This function needs to be implemented in derived class implementations.
**Parameters:**
The parameters are not applicable in this version of anti-spoofing experiments
**Returns:**
files : [:py:class:`File`] or [[:py:class:`File`]]
The (arranged) list of files used for the training.
"""
return self.database.get_train_data()
def original_file_names(self, files):
"""original_file_names(files) -> paths
Returns the full paths of the real and attack data of the given File objects.
**Parameters:**
files : [[:py:class:`antispoofing.utils.db.File`], [:py:class:`antispoofing.utils.db.File`]]
The list of lists ([real, attack]]) of file object to retrieve the original data file names for.
**Returns:**
paths : [str]
The paths extracted for the concatenated real+attack files, in the preserved order.
"""
realfiles = files[0]
attackfiles = files[1]
realpaths = [file.make_path(directory=self.original_directory, extension=self.original_extension) for file in
realfiles]
attackpaths = [file.make_path(directory=self.original_directory, extension=self.original_extension) for file in
attackfiles]
return realpaths + attackpaths
from . import database
from . import database_sql
from . import preprocessor
from . import extractor
from . import algorithm
......@@ -21,8 +21,8 @@ import os
import sys
import six
from bob.pad.db import PadFile
from bob.pad.db import PadDatabase
from bob.pad.base.database import PadFile
from bob.pad.base.database import PadDatabase
import bob.io.base
from bob.db.base.driver import Interface as BaseInterface
......@@ -35,6 +35,11 @@ dummy_train_list = ['train_real', 'train_attack']
dummy_devel_list = ['dev_real', 'dev_attack']
dummy_test_list = ['eval_real', 'eval_attack']
dummy_data = {'train_real': 1.0, 'train_attack': 2.0,
'dev_real': 3.0, 'dev_attack': 4.0,
'eval_real': 5.0, 'eval_attack': 6.0}
class TestFile(PadFile):
def __init__(self, path, id):
attack_type = None
......@@ -42,6 +47,27 @@ class TestFile(PadFile):
attack_type = "attack"
PadFile.__init__(self, client_id=1, path=path, file_id=id, attack_type=attack_type)
def load(self, directory=None, extension='.hdf5'):
"""Loads the data at the specified location and using the given extension.
Override it if you need to load differently.
Keyword Parameters:
data
The data blob to be saved (normally a :py:class:`numpy.ndarray`).
directory
[optional] If not empty or None, this directory is prefixed to the final
file destination
extension
[optional] The extension of the filename - this will control the type of
output and the codec for saving the input blob.
"""
# get the path
path = self.make_path(directory or '', extension or '')
return dummy_data[os.path.basename(path)]
def dumplist(args):
"""Dumps lists of files based on your criteria"""
......
......@@ -33,8 +33,5 @@ class DummyPreprocessor(Preprocessor):
"""Does nothing, simply converts the data type of the data, ignoring any annotation."""
return data
def read_original_data(self, original_file_name):
return dummy_data[os.path.basename(original_file_name)]
preprocessor = DummyPreprocessor()
......@@ -107,9 +107,18 @@ class FileSelector:
# List of files that will be used for all files
def original_data_list(self, groups=None):
"""Returns the tuple of lists of original (real, attack) data that can be used for preprocessing."""
"""Returns the the joint list of original (real and attack) file names."""
return self.database.original_file_names(self.database.all_files(groups=groups))
def original_data_list_files(self, groups=None):
"""Returns the joint list of original (real and attack) data files that can be used for preprocessing."""
files = self.database.all_files(groups=groups)
if len(files) != 2:
fileset = files
else:
fileset = files[0]+files[1]
return fileset, self.database.original_directory, self.database.original_extension
def preprocessed_data_list(self, groups=None):
"""Returns the tuple of lists (real, attacks) of preprocessed data files."""
return self.get_paths(self.database.all_files(groups=groups), "preprocessed")
......
......@@ -26,6 +26,8 @@ import bob.core
logger = bob.core.log.setup("bob.pad.base")
from bob.pad.base.database import PadDatabase
from bob.bio.base import utils
from . import FileSelector
from .. import database
......@@ -264,7 +266,7 @@ def initialize(parsers, command_line_parameters=None, skips=[]):
projector_sub_dir = extractor_sub_dir
# Database directories, which should be automatically replaced
if isinstance(args.database, database.DatabaseBobSpoof):
if isinstance(args.database, PadDatabase):
args.database.replace_directories(args.database_directories_file)
# initialize the file selector
......
......@@ -55,8 +55,10 @@ def preprocess(preprocessor, groups=None, indices=None, force=False):
fs = FileSelector.instance()
# get the file lists
data_files = fs.original_data_list(groups=groups)
data_files, original_directory, original_extension = fs.original_data_list_files(groups=groups)
preprocessed_data_files = fs.preprocessed_data_list(groups=groups)
print("len of data files: %s" %(str(len(data_files))))
print("len of preprocessed data files (paths): %s" %(str(len(preprocessed_data_files))))
# select a subset of keys to iterate
if indices is not None:
......@@ -71,18 +73,23 @@ def preprocess(preprocessor, groups=None, indices=None, force=False):
# iterate over the selected files
for i in index_range:
preprocessed_data_file = str(preprocessed_data_files[i])
file_object = data_files[i]
file_name = file_object.make_path(original_directory, original_extension)
# check for existence
if not utils.check_file(preprocessed_data_file, force, 1000):
file_name = data_files[i]
data = preprocessor.read_original_data(file_name)
logger.info("... Processing original data file '%s'", file_name)
data = preprocessor.read_original_data(file_object, original_directory, original_extension)
# create output directory before reading the data file (is sometimes required, when relative directories are specified, especially, including a .. somewhere)
bob.io.base.create_directories_safe(os.path.dirname(preprocessed_data_file))
# call the preprocessor
logger.info("- Preprocessor: processing file: %s", file_name)
preprocessed_data = preprocessor(data, None)
if preprocessed_data is None:
logger.error("Preprocessing of file '%s' was not successful", file_name)
continue
# write the data
bob.io.base.create_directories_safe(os.path.dirname(preprocessed_data_file))
preprocessor.write_data(preprocessed_data, preprocessed_data_file)
......
; vim: set fileencoding=utf-8 :
; Pavel Korshunov <Pavel.Korshunov@idiap.ch>
; Wed 19 Aug 13:43:22 2015
; Tue 16 Aug 15:00:20 CEST 2016
[buildout]
parts = scripts
develop = .
eggs = bob.pad.base
gridtk
extensions = bob.buildout
mr.developer
auto-checkout = *
develop = src/bob.db.base
src/bob.bio.base
src/bob.bio.db
src/bob.pad.db
.
; options for bob.buildout
debug = true
verbose = true
newest = false
[sources]
bob.db.base = git branch=refactoring_2016 git@github.com:bioidiap/bob.db.base.git
bob.bio.base = git https://github.com/bioidiap/bob.bio.base
bob.bio.db = git git@gitlab.idiap.ch:biometric/bob.bio.db.git
bob.pad.db = git git@gitlab.idiap.ch:biometric/bob.pad.db.git
verbose = true
[scripts]
recipe = bob.buildout:scripts
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment