Commit 21643d35 authored by MGB's avatar MGB

first commit

parents
include README.rst bootstrap-buildout.py buildout.cfg COPYING requirements.txt version.txt
recursive-include doc *.py *.rst
recursive-include bob *.sql3
.. image:: http://img.shields.io/badge/docs-stable-yellow.png
:target: http://pythonhosted.org/bob.db.KBOC16/index.html
.. image:: http://img.shields.io/badge/docs-latest-orange.png
:target: https://www.idiap.ch/software/bob/docs/latest/bioidiap/bob.db.atvskeystroke/master/index.html
.. image:: https://travis-ci.org/bioidiap/bob.db.KBOC16?branch=master
:target: https://travis-ci.org/bioidiap/bob.db.KBOC16
.. image:: https://coveralls.io/repos/bioidiap/bob.db.KBOC16/badge.svg
:target: https://coveralls.io/r/bioidiap/bob.db.KBOC16
.. image:: https://img.shields.io/badge/github-master-0000c0.png
:target: https://github.com/bioidiap/bob.db.KBOC16/tree/master
.. image:: http://img.shields.io/pypi/v/bob.db.KBOC16.png
:target: https://pypi.python.org/pypi/bob.db.KBOC16
.. image:: http://img.shields.io/pypi/dm/bob.db.KBOC16.png
:target: https://pypi.python.org/pypi/bob.db.KBOC16
==================
KBOC16 Database
==================
This package contains the access API and descriptions for the `ATVS Keystroke
Database`_.
You would normally not install this package unless you are maintaining it. What
you would do instead is to tie it in at the package you need to **use** it.
There are a few ways to achieve this:
1. You can add this package as a requirement at the ``setup.py`` for your own
`satellite package
<https://github.com/idiap/bob/wiki/Virtual-Work-Environments-with-Buildout>`_
or to your Buildout ``.cfg`` file, if you prefer it that way. With this
method, this package gets automatically downloaded and installed on your
working environment, or
2. You can manually download and install this package using commands like
``easy_install`` or ``pip``.
The package is available in two different distribution formats:
1. You can download it from `PyPI <http://pypi.python.org/pypi>`_, or
2. You can download it in its source form from `its git repository
<https://github.com/mgbarrero/bob.db.KBOC16>`_. When you download the
version at the git repository, you will need to run a command to recreate
the backend SQLite file required for its operation. This means that the
database raw files must be installed somewhere in this case. With option
``a`` you can run in `dummy` mode and only download the raw data files for
the database once you are happy with your setup.
You can mix and match points 1/2 above based on your requirements. Here are
some examples:
Modify your setup.py and download from PyPI
===========================================
That is the easiest. Edit your ``setup.py`` in your satellite package and add
the following entry in the ``install_requires`` section (note: ``...`` means
`whatever extra stuff you may have in-between`, don't put that on your
script)::
install_requires=[
...
"bob.db.atvskeystroke",
],
Proceed normally with your ``boostrap/buildout`` steps and you should be all
set. That means you can now import the ``bob.db.KBOC16`` namespace into your scripts.
Modify your buildout.cfg and download from git
==============================================
You will need to add a dependence to `mr.developer
<http://pypi.python.org/pypi/mr.developer/>`_ to be able to install from our
git repositories. Your ``buildout.cfg`` file should contain the following
lines::
[buildout]
...
extensions =mr.developer
auto-checkout = *
eggs = ...
bob.db.KBOC16
[sources]
bob.db.KBOC16 = git https://github.com/mgbarrero/bob.db.KBOC16.git
...
from pkgutil import extend_path
__path__ = extend_path(__path__, __name__)
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""The ATVSKeystroke database
"""
from .query import Database
from .models import Client, File, Protocol, ProtocolPurpose
def get_config():
"""Returns a string containing the configuration information.
"""
import bob.extension
return bob.extension.get_config(__name__)
# gets sphinx autodoc done right - don't remove it
__all__ = [_ for _ in dir() if not _.startswith('_')]
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""This script creates the ATVSKeystroke database in a single pass.
"""
import os,string
from .models import *
# clients
userid_clients = range(1, 300)
def nodot(item):
"""Can be used to ignore hidden files, starting with the . character."""
return item[0] != '.'
def add_clients(session, verbose):
"""Add clients to the ATVS Keystroke database."""
for cdid in userid_clients:
if verbose>1: print(" Adding user '%s'" % (cid))
session.add(Client(cid))
def add_files(session, imagedir, verbose):
"""Add files to the KBOC16 database."""
def add_file(session, basename, userid, shotid, sessionid):
"""Parse a single filename and add it to the list."""
session.add(File(userid, basename, sessionid, shotid))
filenames = os.listdir(imagedir)
for filename in filenames:
basename, extension = os.path.splitext(filename)
if extension == db_file_extension:
if verbose>1: print(" Adding file '%s'..." % (basename))
parts = string.split(basename, "_")
shotid = int(parts[1])
userid = int(parts[0][-3:])
if shotid <= 4:
sessionid = 1
elif shotid > 4:
sessionid = 2
shotid = shotid - 4
add_file(session, basename, userid, shotid, sessionid)
def add_protocols(session, verbose):
"""Adds protocols"""
# 1. DEFINITIONS
enroll_session = [1]
client_probe_session = [2]
protocols = ['A']
# 2. ADDITIONS TO THE SQL DATABASE
protocolPurpose_list = [('eval', 'enrol'), ('eval', 'probe')]
for proto in protocols:
p = Protocol(proto)
# Add protocol
if verbose: print("Adding protocol %s..." % (proto))
session.add(p)
session.flush()
session.refresh(p)
# Add protocol purposes
for key in range(len(protocolPurpose_list)):
purpose = protocolPurpose_list[key]
print('%s %s %s' % (p.id, purpose[0], purpose[1]))
pu = ProtocolPurpose(p.id, purpose[0], purpose[1])
if verbose>1: print(" Adding protocol purpose ('%s','%s')..." % (purpose[0], purpose[1]))
session.add(pu)
session.flush()
session.refresh(pu)
# Add files attached with this protocol purpose
if(key == 0): #test enrol
q = session.query(File).join(Client).filter(File.session_id.in_(enroll_session))
for k in q:
if verbose>1: print(" Adding protocol file '%s'..." % (k.path))
pu.files.append(k)
elif(key == 1): #test probe
q = session.query(File).join(Client).filter(File.session_id.in_(client_probe_session))
for k in q:
if verbose>1: print(" Adding protocol file '%s'..." % (k.path))
pu.files.append(k)
def create_tables(args):
"""Creates all necessary tables (only to be used at the first time)"""
from bob.db.base.utils import create_engine_try_nolock
engine = create_engine_try_nolock(args.type, args.files[0], echo=(args.verbose > 2))
Base.metadata.create_all(engine)
# Driver API
# ==========
def create(args):
"""Creates or re-creates this database"""
from bob.db.base.utils import session_try_nolock
dbfile = args.files[0]
if args.recreate:
if args.verbose and os.path.exists(dbfile):
print('unlinking %s...' % dbfile)
if os.path.exists(dbfile): os.unlink(dbfile)
if not os.path.exists(os.path.dirname(dbfile)):
os.makedirs(os.path.dirname(dbfile))
# the real work...
create_tables(args)
s = session_try_nolock(args.type, dbfile, echo=(args.verbose > 2))
add_clients(s, args.verbose)
add_files(s, args.imagedir, args.verbose)
add_protocols(s, args.verbose)
s.commit()
s.close()
def add_command(subparsers):
"""Add specific subcommands that the action "create" can use"""
parser = subparsers.add_parser('create', help=create.__doc__)
parser.add_argument('-R', '--recreate', action='store_true', help="If set, I'll first erase the current database")
parser.add_argument('-v', '--verbose', action='count', help="Do SQL operations in a verbose way?")
parser.add_argument('-D', '--imagedir', metavar='DIR', default='/home/bob/KBOC16data', help="Change the relative path to the directory containing the images of the KBOC16 database.")
parser.set_defaults(func=create) #action
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""Commands this database can respond to.
"""
import os
import sys
from bob.db.base.driver import Interface as BaseInterface
def dumplist(args):
"""Dumps lists of files based on your criteria"""
from .query import Database
db = Database()
r = db.objects(
protocol=args.protocol,
purposes=args.purpose,
model_ids=(args.client,),
groups=args.group,
classes=args.sclass
)
output = sys.stdout
if args.selftest:
from bob.db.base.utils import null
output = null()
for f in r:
output.write('%s\n' % (f.make_path(args.directory, args.extension),))
return 0
def checkfiles(args):
"""Checks existence of files based on your criteria"""
from .query import Database
db = Database()
r = db.objects()
# go through all files, check if they are available on the filesystem
good = []
bad = []
for f in r:
if os.path.exists(f.make_path(args.directory, args.extension)):
good.append(f)
else:
bad.append(f)
# report
output = sys.stdout
if args.selftest:
from bob.db.base.utils import null
output = null()
if bad:
for f in bad:
output.write('Cannot find file "%s"\n' % (f.make_path(args.directory, args.extension),))
output.write('%d files (out of %d) were not found at "%s"\n' % \
(len(bad), len(r), args.directory))
return 0
def reverse(args):
"""Returns a list of file database identifiers given the path stems"""
from .query import Database
db = Database()
output = sys.stdout
if args.selftest:
from bob.db.base.utils import null
output = null()
r = db.reverse(args.path)
for f in r: output.write('%d\n' % f.id)
if not r: return 1
return 0
def path(args):
"""Returns a list of fully formed paths or stems given some file id"""
from .query import Database
db = Database()
output = sys.stdout
if args.selftest:
from bob.db.base.utils import null
output = null()
r = db.paths(args.id, prefix=args.directory, suffix=args.extension)
for path in r: output.write('%s\n' % path)
if not r: return 1
return 0
class Interface(BaseInterface):
def name(self):
return 'KBOC16'
def version(self):
import pkg_resources # part of setuptools
return pkg_resources.require('bob.db.%s' % self.name())[0].version
def files(self):
from pkg_resources import resource_filename
raw_files = ('db.sql3',)
return [resource_filename(__name__, k) for k in raw_files]
def type(self):
return 'sqlite'
def add_commands(self, parser):
from . import __doc__ as docs
subparsers = self.setup_parser(parser,
"KBOC16 database", docs)
# example: get the "create" action from a submodule
from .create import add_command as create_command
create_command(subparsers)
from .query import Database
import argparse
db = Database()
# example: get the "dumplist" action from a submodule
parser = subparsers.add_parser('dumplist', help=dumplist.__doc__)
parser.add_argument('-d', '--directory', default='', help="if given, this path will be prepended to every entry returned.")
parser.add_argument('-e', '--extension', default='', help="if given, this extension will be appended to every entry returned.")
parser.add_argument('-p', '--protocol', help="if given, limits the dump to a particular subset of the data that corresponds to the given protocol.", choices=db.protocol_names() if db.is_valid() else ())
parser.add_argument('-u', '--purpose', help="if given, this value will limit the output files to those designed for the given purposes.", choices=db.purposes() if db.is_valid() else ())
parser.add_argument('-C', '--client', type=int, help="if given, this value will limit the output files to those belonging to a particular protocolar group.", choices=db.model_ids() if db.is_valid() else ())
parser.add_argument('-g', '--group', help="if given, this value will limit the output files to those belonging to a particular protocolar group.", choices=db.groups() if db.is_valid() else ())
parser.add_argument('-c', '--class', dest="sclass", help="if given, this value will limit the output files to those belonging to the given classes.", choices=('client', 'impostor', ''))
parser.add_argument('--self-test', dest="selftest", action='store_true', help=argparse.SUPPRESS)
parser.set_defaults(func=dumplist) #action
# the "checkfiles" action
parser = subparsers.add_parser('checkfiles', help=checkfiles.__doc__)
parser.add_argument('-d', '--directory', default='', help="if given, this path will be prepended to every entry returned.")
parser.add_argument('-e', '--extension', default='', help="if given, this extension will be appended to every entry returned.")
parser.add_argument('--self-test', dest="selftest", action='store_true', help=argparse.SUPPRESS)
parser.set_defaults(func=checkfiles) #action
# adds the "reverse" command
parser = subparsers.add_parser('reverse', help=reverse.__doc__)
parser.add_argument('path', nargs='+', type=str, help="one or more path stems to look up. If you provide more than one, files which cannot be reversed will be omitted from the output.")
parser.add_argument('--self-test', dest="selftest", action='store_true', help=argparse.SUPPRESS)
parser.set_defaults(func=reverse) #action
# adds the "path" command
parser = subparsers.add_parser('path', help=path.__doc__)
parser.add_argument('-d', '--directory', default='', help="if given, this path will be prepended to every entry returned.")
parser.add_argument('-e', '--extension', default='', help="if given, this extension will be appended to every entry returned.")
parser.add_argument('id', nargs='+', type=int, help="one or more file ids to look up. If you provide more than one, files which cannot be found will be omitted from the output. If you provide a single id to lookup, an error message will be printed if the id does not exist in the database. The exit status will be non-zero in such case.")
parser.add_argument('--self-test', dest="selftest", action='store_true', help=argparse.SUPPRESS)
parser.set_defaults(func=path) #action
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""Table models and functionality for the KBOC16 database.
"""
import os, numpy
from sqlalchemy import Table, Column, Integer, String, ForeignKey, or_, and_, not_
from bob.db.base.sqlalchemy_migration import Enum, relationship
from sqlalchemy.orm import backref
from sqlalchemy.ext.declarative import declarative_base
import bob.db.verification.utils
Base = declarative_base()
protocolPurpose_file_association = Table('protocolPurpose_file_association', Base.metadata,
Column('protocolPurpose_id', Integer, ForeignKey('protocolPurpose.id')),
Column('file_id', Integer, ForeignKey('file.id')))
db_file_extension = '.txt'
class Client(Base):
"""Database clients, marked by an integer identifier and the group they belong to"""
__tablename__ = 'client'
# Key identifier for the client
id = Column(String(100), primary_key=True)
def __init__(self, id):
self.id = id
def __repr__(self):
return "Client(`%s`)" % (self.id)
class File(Base, bob.db.verification.utils.File):
"""Generic file container"""
__tablename__ = 'file'
# Key identifier for the file
id = Column(Integer, primary_key=True)
# Key identifier of the client associated with this file
client_id = Column(String(100), ForeignKey('client.id')) # for SQL
# Unique path to this file inside the database
path = Column(String(100), unique=True)
# Session identifier
session_id = Column(Integer)
# Shot identifier
shot_id = Column(Integer)
# For Python: A direct link to the client object that this file belongs to
client = relationship("Client", backref=backref("files", order_by=id))
def __init__(self, client_id, path, session_id, shot_id):
# call base class constructor
bob.db.verification.utils.File.__init__(self, client_id = client_id, path = path)
self.session_id = session_id
self.shot_id = shot_id
class Protocol(Base):
"""KBOC16 protocols"""
__tablename__ = 'protocol'
# Unique identifier for this protocol object
id = Column(Integer, primary_key=True)
# Name of the protocol associated with this object
name = Column(String(20), unique=True)
def __init__(self, name):
self.name = name
def __repr__(self):
return "Protocol('%s')" % (self.name,)
class ProtocolPurpose(Base):
"""KBOC16 protocol purposes"""
__tablename__ = 'protocolPurpose'
# Unique identifier for this protocol purpose object
id = Column(Integer, primary_key=True)
# Id of the protocol associated with this protocol purpose object
protocol_id = Column(Integer, ForeignKey('protocol.id')) # for SQL
# Group associated with this protocol purpose object
group_choices = ('eval',)
sgroup = Column(Enum(*group_choices))
# Purpose associated with this protocol purpose object
purpose_choices = ('enrol', 'probe')
purpose = Column(Enum(*purpose_choices))
# For Python: A direct link to the Protocol object that this ProtocolPurpose belongs to
protocol = relationship("Protocol", backref=backref("purposes", order_by=id))
# For Python: A direct link to the File objects associated with this ProtcolPurpose
files = relationship("File", secondary=protocolPurpose_file_association, backref=backref("protocolPurposes", order_by=id))
def __init__(self, protocol_id, sgroup, purpose):
self.protocol_id = protocol_id
self.sgroup = sgroup
self.purpose = purpose
def __repr__(self):
return "ProtocolPurpose('%s', '%s', '%s')" % (self.protocol.name, self.sgroup, self.purpose)
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""This module provides the Dataset interface allowing the user to query the
KBOC16 database in the most obvious ways.
"""
import os
import six
from .models import *
from .driver import Interface
import bob.db.verification.utils
SQLITE_FILE = Interface().files()[0]
class Database(bob.db.verification.utils.SQLiteDatabase,bob.db.verification.utils.Database):
"""The dataset class opens and maintains a connection opened to the Database.
It provides many different ways to probe for the characteristics of the data
and for the data itself inside the database.
"""
def __init__(self, original_directory = None, original_extension = db_file_extension):
# call base class constructor
bob.db.verification.utils.SQLiteDatabase.__init__(self, SQLITE_FILE, File)
bob.db.verification.utils.Database.__init__(self, original_directory=original_directory, original_extension=original_extension)
def __group_replace_eval_by_genuine__(self, l):
"""Replace 'eval' by 'Genuine' and returns the new list"""
if not l: return l
elif isinstance(l, six.string_types): return self.__group_replace_eval_by_genuine__((l,))
l2 = []
for val in l:
if (val == 'eval'): l2.append('Genuine')
elif (val in Client.type_choices): l2.append(val)
return tuple(set(l2))
def groups(self, protocol=None):
"""Returns the names of all registered groups"""
return ProtocolPurpose.group_choices
def client_types(self):
"""Returns the names of the types."""
return Client.type_choices
def client_groups(self):
"""Returns the names of the groups. This is specific to this database which
does not have separate training, development and evaluation sets."""
return ProtocolPurpose.group_choices
def clients(self, protocol=None, groups=None):
"""Returns a list of :py:class:`.Client` for the specific query by the user.
Keyword Parameters:
protocol
Ignored.
groups
The groups (types) to which the clients belong either from ('Genuine', 'Impostor')
Note that 'eval' is an alias for 'Genuine'.
If no groups are specified, then both clients are impostors are listed.
Returns: A list containing all the clients which have the given properties.
"""
groups = self.__group_replace_eval_by_genuine__(groups)
groups = self.check_parameters_for_validity(groups, "group", self.client_types())
# List of the clients
q = self.query(Client)
if groups:
q = q.filter(Client.stype.in_(groups))
q = q.order_by(Client.id)
return list(q)
def models(self, protocol=None, groups=None):
"""Returns a list of :py:class:`.Client` for the specific query by the user.
Models correspond to Clients for this database (At most one model per identity).
Keyword Parameters:
protocol
Ignored.
groups
The groups to which the subjects attached to the models belong ('Genuine')
Note that 'dev', 'eval' and 'world' are alias for 'Genuine'.
Returns: A list containing all the models (model <-> client in AVTSKeystroke) belonging
to the given group.
"""
groups = self.__group_replace_eval_by_genuine__(groups)
groups = self.check_parameters_for_validity(groups, "group", ('Genuine',))
# List of the clients
q = self.query(Client)
if groups:
q = q.filter(Client.stype.in_(groups))
else:
q = q.filter(Client.stype.in_(['Genuine']))
q = q.order_by(Client.id)
return list(q)
def model_ids(self, protocol=None, groups=None):
"""Returns a list of model ids for the specific query by the user.
Models correspond to Clients for the XM2VTS database (At most one model per identity).
Keyword Parameters:
protocol
Ignored.
groups
The groups to which the subjects attached to the models belong ('dev', 'eval', 'world')
Note that 'dev', 'eval' and 'world' are alias for 'client'.
If no groups are specified, then both clients are impostors are listed.
Returns: A list containing all the model ids (model <-> client in XM2VTS) belonging
to the given group.
"""
return [client.subid for client in self.models(protocol, groups)]
def has_client_id(self, id):
"""Returns True if we have a client with a certain integer identifier"""