Commit e2c2bae1 authored by Manuel Günther's avatar Manuel Günther
Browse files

First version of (locally) running verify script and some dummy...

First version of (locally) running verify script and some dummy implementations of database, preprocessor, extractor and algorithm
parents
*~
*.swp
*.pyc
bin
eggs
parts
.installed.cfg
.mr.developer.cfg
*.egg-info
src
develop-eggs
sphinx
dist
This diff is collapsed.
include README.rst bootstrap-buildout.py buildout.cfg COPYING version.txt
recursive-include doc *.py *.rst
Example buildout environment
============================
This simple example demonstrates how to wrap Bob-based scripts on buildout
environments. This may be useful for homework assignments, tests or as a way to
distribute code to reproduce your publication. In summary, if you need to give
out code to others, we recommend you do it following this template so your code
can be tested, documented and run in an orderly fashion.
Installation
------------
.. note::
To follow these instructions locally you will need a local copy of this
package. For that, you can use the github tarball API to download the package::
$ wget --no-check-certificate https://github.com/idiap/bob.project.example/tarball/master -O- | tar xz
$ mv idiap-bob.project* bob.project.example
Documentation and Further Information
-------------------------------------
Please refer to the latest Bob user guide, accessing from the `Bob website
<http://idiap.github.com/bob/>`_ for how to create your own packages based on
this example. In particular, the Section entitled `Organize Your Work in
Satellite Packages <http://www.idiap.ch/software/bob/docs/releases/last/sphinx/html/OrganizeYourCode.html>`_
contains details on how to setup, build and roll out your code.
#see http://peak.telecommunity.com/DevCenter/setuptools#namespace-packages
__import__('pkg_resources').declare_namespace(__name__)
#see http://peak.telecommunity.com/DevCenter/setuptools#namespace-packages
__import__('pkg_resources').declare_namespace(__name__)
from .utils import *
from . import database
from . import algorithm
from . import extractor
def get_config():
"""Returns a string containing the configuration information.
"""
import bob.extension
return bob.extension.get_config(__name__)
# gets sphinx autodoc done right - don't remove it
__all__ = [_ for _ in dir() if not _.startswith('_')]
from .Algorithm import Algorithm
class Database:
"""This class represents the basic API for database access.
Please use this class as a base class for your database access classes.
Do not forget to call the constructor of this base class in your derived class."""
def __init__(
self,
name,
original_directory = None,
original_extension = None,
annotation_directory = None,
annotation_extension = '.pos',
annotation_type = None,
protocol = 'Default',
training_depends_on_protocol = False,
models_depend_on_protocol = False,
**kwargs
):
"""
Parameters to the constructor of the Database:
name
A unique name for the database.
original_directory : str
The directory where the original data of the database are stored.
original_extension : str
The file name extension of the original data.
annotation_directory : str
The directory where the image annotations of the database are stored, if any.
annotation_extension : str
The file name extension of the annotation files.
annotation_type : str
The type of the annotation file to read, see :py:func:`bob.db.verification.utils.read_annotation_file` for accepted formats.
protocol : str
The name of the protocol that defines the default experimental setup for this database.
training_depends_on_protocol : bool
Specifies, if the training set used for training the extractor and the projector depend on the protocol
models_depend_on_protocol : bool
Specifies, if the models depend on the protocol
kwargs
Ignored extra arguments.
"""
self.name = name
self.original_directory = original_directory
self.original_extension = original_extension
self.annotation_directory = annotation_directory
self.annotation_extension = annotation_extension
self.annotation_type = annotation_type
self.protocol = protocol
self.training_depends_on_protocol = training_depends_on_protocol
self.models_depend_on_protocol = models_depend_on_protocol
def __str__(self):
"""This function returns a string containing all parameters of this class."""
params = "name=%s, protocol=%s, original_directory=%s, original_extension=%s" % (self.name, self.protocol, self.original_directory, self.original_extension)
if self.annotation_type is not None:
params += ", annotation_type=%s" % annotation_type
if self.annotation_directory: params += ", annotation_directory=%s" % self.annotation_directory
params += ", annotation_extension=%s" % self.annotation_extension
params += ", training_depends_on_protocol=%s, models_depend_on_protocol=%s" % (self.training_depends_on_protocol, self.models_depend_on_protocol)
return "%s(%s)" % (str(self.__class__), params)
###########################################################################
### Helper functions that you might want to use in derived classes
###########################################################################
def sort(self, files):
"""Returns a sorted version of the given list of File's (or other structures that define an 'id' data member).
The files will be sorted according to their id, and duplicate entries will be removed."""
# sort files using their sort function
sorted_files = sorted(files)
# remove duplicates
return [f for i,f in enumerate(sorted_files) if not i or sorted_files[i-1].id != f.id]
def arrange_by_client(self, files):
"""Arranges the given list of files by client id.
This function returns a list of lists of File's."""
client_files = {}
for file in files:
if file.client_id not in client_files:
client_files[file.client_id] = []
client_files[file.client_id].append(file)
files_by_clients = []
for client in sorted(client_files.keys()):
files_by_clients.append(client_files[client])
return files_by_clients
def annotations(self, file):
"""Returns the annotations for the given File object, if available."""
if self.annotation_directory:
try:
import bob.db.verification.utils
annotation_path = os.path.join(self.annotation_directory, file.path + self.annotation_extension)
return bob.db.verification.utils.read_annotation_file(annotation_path, self.annotation_type)
except ImportError as e:
from .. import utils
utils.error("Cannot import bob.db.verification.utils: '%s'. No annotation is read." % e)
return None
def uses_probe_file_sets(self):
"""Defines if, for the current protocol, the database uses several probe files to generate a score.
By default, False is returned. Overwrite the default if you need different behavior."""
return False
def file_names(self, files, directory, extension):
"""Returns the full path of the given File objects."""
# return the paths of the files
if self.uses_probe_file_sets() and files and hasattr(files[0], 'files'):
# List of Filesets: do not remove duplicates
return [[f.make_path(directory, extension) for f in file_set.files] for file_set in files]
else:
# List of files, do not remove duplicates
return [f.make_path(directory, extension) for f in files]
def original_file_names(self, files):
"""Returns the full path of the original data of the given File objects."""
assert self.original_directory is not None
assert self.original_extension is not None
return self.file_names(files, self.original_directory, self.original_extension)
###########################################################################
### Interface functions that you need to implement in your class.
###########################################################################
def all_files(self, groups = None):
"""Returns all files of the database"""
raise NotImplementedError("Please implement this function in derived classes")
def training_files(self, step = None, arrange_by_client = False):
"""Returns all training File objects for the given step (might be 'train_extractor', 'train_projector', 'train_enroller' or None), and arranges them by client, if desired"""
raise NotImplementedError("Please implement this function in derived classes")
def model_ids(self, group = 'dev'):
"""Returns a list of model ids for the given group"""
raise NotImplementedError("Please implement this function in derived classes")
def client_id_from_model_id(self, model_id, group = 'dev'):
"""Returns the client id for the given model id"""
raise NotImplementedError("Please implement this function in derived classes")
def enroll_files(self, model_id, group = 'dev'):
"""Returns a list of enrollment File objects for the given model id and the given group"""
raise NotImplementedError("Please implement this function in derived classes")
def probe_files(self, model_id = None, group = 'dev'):
"""Returns a list of probe File object in a specific format that should be compared with the model belonging to the given model id of the specified group"""
raise NotImplementedError("Please implement this function in derived classes")
def probe_file_sets(self, model_id = None, group = 'dev'):
"""Returns a list of probe FileSet object in a specific format that should be compared with the model belonging to the given model id of the specified group"""
raise NotImplementedError("Please implement this function in derived classes")
class DatabaseZT (Database):
"""This class defines additional API functions that are required to compute ZT score normalization.
During construction, please call the constructor of the base class 'Database' directly."""
def t_model_ids(self, group = 'dev'):
"""Returns a list of T-Norm model ids for the given group"""
raise NotImplementedError("Please implement this function in derived classes")
def client_id_from_t_model_id(self, t_model_id, group = 'dev'):
"""Returns the client id for the given T-model id.
In this base class implementation, we just use the :py:meth:`client_id_from_model_id` function.
Overload this function if you need another behavior."""
return self.client_id_from_model_id(t_model_id, group)
def t_enroll_files(self, model_id, group = 'dev'):
"""Returns a list of enrollment files for the given T-Norm model id and the given group"""
raise NotImplementedError("Please implement this function in derived classes")
def z_probe_files(self, model_id = None, group = 'dev'):
"""Returns a list of Z-probe objects in a specific format that should be compared with the model belonging to the given model id of the specified group"""
raise NotImplementedError("Please implement this function in derived classes")
def z_probe_file_sets(self, model_id = None, group = 'dev'):
"""Returns a list of Z-probe FileSets object in a specific format that should be compared with the model belonging to the given model id of the specified group"""
raise NotImplementedError("Please implement this function in derived classes")
from .Database import Database, DatabaseZT
class DatabaseBob (Database):
"""This class can be used whenever you have a database that follows the default Bob database interface."""
def __init__(
self,
database, # The bob database that is used
all_files_options = {}, # additional options for the database query that can be used to extract all files
extractor_training_options = {}, # additional options for the database query that can be used to extract the training files for the extractor training
projector_training_options = {}, # additional options for the database query that can be used to extract the training files for the extractor training
enroller_training_options = {}, # additional options for the database query that can be used to extract the training files for the extractor training
check_original_files_for_existence = False,
**kwargs # The default parameters of the base class
):
"""
Parameters of the constructor of this database:
database : derivative of :py:class:`bob.db.verification.utils.Database`
the bob.db.___ database that provides the actual interface, see :ref:`verification_databases` for a list.
image_directory
The directory where the original images are stored.
image_extension
The file extension of the original images.
all_files_options
Options passed to the database query used to retrieve all data.
extractor_training_options
Options passed to the database query used to retrieve the images for the extractor training.
projector_training_options
Options passed to the database query used to retrieve the images for the projector training.
enroller_training_options
Options passed to the database query used to retrieve the images for the enroller training.
check_original_files_for_existence
Enables the test for the original data files when querying the database.
kwargs
The arguments of the base class
"""
Database.__init__(
self,
**kwargs
)
self.database = database
self.original_directory = database.original_directory
self.all_files_options = all_files_options
self.extractor_training_options = extractor_training_options
self.projector_training_options = projector_training_options
self.enroller_training_options = enroller_training_options
self.check_existence = check_original_files_for_existence
self._kwargs = kwargs
def __str__(self):
"""This function returns a string containing all parameters of this class (and its derived class)."""
params = ", ".join(["%s=%s" % (key, value) for key, value in self._kwargs.items()])
params += ", original_directory=%s, original_extension=%s" % (self.original_directory, self.original_extension)
if self.all_files_options: params += ", all_files_options=%s"%self.all_files_options
if self.extractor_training_options: params += ", extractor_training_options=%s"%self.extractor_training_options
if self.projector_training_options: params += ", projector_training_options=%s"%self.projector_training_options
if self.enroller_training_options: params += ", enroller_training_options=%s"%self.enroller_training_options
return "%s(%s)" % (str(self.__class__), params)
def uses_probe_file_sets(self):
"""Defines if, for the current protocol, the database uses several probe files to generate a score."""
return self.protocol != 'None' and self.database.provides_file_set_for_protocol(self.protocol)
def all_files(self, groups = None):
"""Returns all File objects of the database for the current protocol. If the current protocol is 'None' (a string), None (NoneType) will be used instead"""
files = self.database.objects(protocol = self.protocol if self.protocol != 'None' else None, groups = groups, **self.all_files_options)
return self.sort(files)
def training_files(self, step = None, arrange_by_client = False):
"""Returns all training File objects of the database for the current protocol."""
if step is None:
training_options = self.all_files_options
elif step == 'train_extractor':
training_options = self.extractor_training_options
elif step == 'train_projector':
training_options = self.projector_training_options
elif step == 'train_enroller':
training_options = self.enroller_training_options
else:
raise ValueError("The given step '%s' must be one of ('train_extractor', 'train_projector', 'train_enroller')" % step)
files = self.sort(self.database.objects(protocol = self.protocol, groups = 'world', **training_options))
if arrange_by_client:
return self.arrange_by_client(files)
else:
return files
def test_files(self, groups = ['dev']):
"""Returns the test files (i.e., enrollment and probe files) for the given groups."""
return self.sort(self.database.test_files(protocol = self.protocol, groups = groups, **self.all_files_options))
def model_ids(self, group = 'dev'):
"""Returns the model ids for the given group and the current protocol."""
if hasattr(self.database, 'model_ids'):
return sorted(self.database.model_ids(protocol = self.protocol, groups = group))
else:
return sorted([model.id for model in self.database.models(protocol = self.protocol, groups = group)])
def client_id_from_model_id(self, model_id, group = 'dev'):
"""Returns the client id for the given model id."""
if hasattr(self.database, 'get_client_id_from_model_id'):
return self.database.get_client_id_from_model_id(model_id)
else:
return model_id
def enroll_files(self, model_id, group = 'dev'):
"""Returns the list of enrollment File objects for the given model id."""
files = self.database.objects(protocol = self.protocol, groups = group, model_ids = (model_id,), purposes = 'enroll', **self.all_files_options)
return self.sort(files)
def probe_files(self, model_id = None, group = 'dev'):
"""Returns the list of probe File objects (for the given model id, if given)."""
if model_id:
files = self.database.objects(protocol = self.protocol, groups = group, model_ids = (model_id,), purposes = 'probe', **self.all_files_options)
else:
files = self.database.objects(protocol = self.protocol, groups = group, purposes = 'probe', **self.all_files_options)
return self.sort(files)
def probe_file_sets(self, model_id = None, group = 'dev'):
"""Returns the list of probe File objects (for the given model id, if given)."""
if model_id:
file_sets = self.database.object_sets(protocol = self.protocol, groups = group, model_ids = (model_id,), purposes = 'probe', **self.all_files_options)
else:
file_sets = self.database.object_sets(protocol = self.protocol, groups = group, purposes = 'probe', **self.all_files_options)
return self.sort(file_sets)
def annotations(self, file):
"""Returns the annotations for the given File object, if available."""
return self.database.annotations(file)
def original_file_names(self, files):
"""Returns the full path of the original data of the given File objects."""
return self.database.original_file_names(files, self.check_existence)
class DatabaseBobZT (DatabaseBob, DatabaseZT):
"""This class can be used whenever you have a database that follows the default Bob database interface defining file lists for ZT score normalization."""
def __init__(
self,
z_probe_options = {}, # Limit the z-probes
**kwargs
):
# call base class constructor, passing all the parameters to it
DatabaseBob.__init__(self, z_probe_options = z_probe_options, **kwargs)
self.m_z_probe_options = z_probe_options
def all_files(self, groups = ['dev']):
"""Returns all File objects of the database for the current protocol. If the current protocol is 'None' (a string), None (NoneType) will be used instead"""
files = self.database.objects(protocol = self.protocol if self.protocol != 'None' else None, groups = groups, **self.all_files_options)
# add all files that belong to the ZT-norm
for group in groups:
if group == 'world': continue
files += self.database.tobjects(protocol = self.protocol if self.protocol != 'None' else None, groups = group, model_ids = None)
files += self.database.zobjects(protocol = self.protocol if self.protocol != 'None' else None, groups = group, **self.m_z_probe_options)
return self.sort(files)
def t_model_ids(self, group = 'dev'):
"""Returns the T-Norm model ids for the given group and the current protocol."""
if hasattr(self.database, 'tmodel_ids'):
return sorted(self.database.tmodel_ids(protocol = self.protocol, groups = group))
else:
return sorted([model.id for model in self.database.tmodels(protocol = self.protocol, groups = group)])
def t_enroll_files(self, model_id, group = 'dev'):
"""Returns the list of enrollment File objects for the given T-Norm model id."""
files = self.database.tobjects(protocol = self.protocol, groups = group, model_ids = (model_id,))
return self.sort(files)
def z_probe_files(self, group = 'dev'):
"""Returns the list of Z-probe File objects."""
files = self.database.zobjects(protocol = self.protocol, groups = group, **self.m_z_probe_options)
return self.sort(files)
def z_probe_file_sets(self, group = 'dev'):
"""Returns the list of Z-probe Fileset objects."""
file_sets = self.database.zobject_sets(protocol = self.protocol, groups = group, **self.m_z_probe_options)
return self.sort(file_sets)
from .Database import Database, DatabaseZT
from .DatabaseBob import DatabaseBob, DatabaseBobZT
# gets sphinx autodoc done right - don't remove it
__all__ = [_ for _ in dir() if not _.startswith('_')]
class File:
"""This class defines the minimum interface of a file that needs to be exported"""
def __init__(self, file_id, client_id, path):
# The **unique** id of the file
self.id = file_id
# The id of the client that is attached to the file
self.client_id = client_id
# The **relative** path of the file according to the base directory of the database, without file extension
self.path = path
def __lt__(self, other):
# compare two File objects by comparing their IDs
return self.id < other.id
class FileSet:
"""This class defines the minimum interface of a file set that needs to be exported"""
def __init__(self, file_set_id, client_id, file_set_name):
# The **unique** id of the file set
self.id = file_set_id
# The id of the client that is attached to the file
self.client_id = client_id
# A name of the file set
self.path = file_set_name
# The list of files contained in this set
self.files = []
def __lt__(self, other):
# compare two File set objects by comparing their IDs
return self.id < other.id
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# @author: Manuel Guenther <Manuel.Guenther@idiap.ch>
# @date: Tue Oct 2 12:12:39 CEST 2012
#
# Copyright (C) 2011-2012 Idiap Research Institute, Martigny, Switzerland
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import os
from .. import utils
class Extractor:
"""This is the base class for all feature extractors.
It defines the minimum requirements that a derived feature extractor class need to implement.
The constructor takes two parameters:
requires_training : bool
Set this flag to ``True`` if your feature extractor needs to be trained.
In that case, please override the :py:meth:`train` and :py:meth:`load` methods
split_training_data_by_client : bool
Set this flag to ``True`` if your feature extractor requires the training data to be split by clients.
Ignored, if ``requires_training`` is ``False``
"""
def __init__(
self,
requires_training = False, # enable, if your extractor needs training
split_training_data_by_client = False, # enable, if your extractor needs the training files sorted by client
**kwargs # the parameters of the extractor, to be written in the __str__() method
):
# Each class needs to have a constructor taking
# all the parameters that are required for the feature extraction as arguments
self.requires_training = requires_training
self.split_training_data_by_client = split_training_data_by_client
self._kwargs = kwargs
############################################################
### functions that must be overwritten in derived classes
############################################################
def __call__(self, data):
"""This function will actually perform the feature extraction.
It must be overwritten by derived classes.
It takes the (preprocessed) data and returns the features extracted from the data.
"""
raise NotImplementedError("Please overwrite this function in your derived class")
def __str__(self):
"""This function returns a string containing all parameters of this class (and its derived class)."""