Commit 094837a8 authored by André Anjos's avatar André Anjos 💬
Browse files

Overall database refactoring; Improved docs and tests

parent 385fecec
Pipeline #15657 canceled with stages
in 2 minutes and 27 seconds
......@@ -18,3 +18,4 @@ build
*.egg
src/
db.sql3
bob/db/utfvp/data/
include LICENSE README.rst buildout.cfg develop.cfg requirements.txt version.txt
recursive-include doc *.py *.rst
recursive-include bob *.sql3
recursive-include bob *.sql3 *.csv *.txt
......@@ -5,7 +5,7 @@
"""
from .query import Database
from .models import Client, File, Protocol, Model
from .models import Client, Finger, File, Protocol, Subset
def get_config():
......@@ -15,28 +15,5 @@ def get_config():
return bob.extension.get_config(__name__)
# gets sphinx autodoc done right - don't remove it
def __appropriate__(*args):
"""Says object was actually declared here, an not on the import module.
Parameters:
*args: An iterable of objects to modify
Resolves `Sphinx referencing issues
<https://github.com/sphinx-doc/sphinx/issues/3048>`
"""
for obj in args: obj.__module__ = __name__
__appropriate__(
Database,
Client,
File,
Protocol,
Model,
)
# gets sphinx autodoc done right - don't remove it
__all__ = [_ for _ in dir() if not _.startswith('_')]
This diff is collapsed.
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
"""Bob Database Driver entry-point for the UTFVP
"""Bob Database Driver entry-point for the UTFVP Fingervein database
"""
import os
import sys
import pkg_resources
from bob.db.base.driver import Interface as BaseInterface
......@@ -15,16 +16,11 @@ def dumplist(args):
from .query import Database
db = Database()
model_ids = None
if args.models is not None:
if isinstance(args.models, (list, tuple)): model_ids = args.models
else: model_ids = (args.models,)
r = db.objects(
protocol=args.protocol,
purposes=args.purpose,
model_ids=model_ids,
groups=args.group,
classes=args.sclass
purposes=args.purpose,
model_ids=args.models,
)
output = sys.stdout
......@@ -72,28 +68,33 @@ def checkfiles(args):
class Interface(BaseInterface):
def name(self):
return 'utfvp'
def version(self):
import pkg_resources # part of setuptools
return pkg_resources.require('bob.db.%s' % self.name())[0].version
def files(self):
basedir = pkg_resources.resource_filename(__name__, '')
filelist = os.path.join(basedir, 'files.txt')
with open(filelist, 'rt') as f:
return [os.path.join(basedir, k.strip()) for k in \
f.readlines() if k.strip()]
from pkg_resources import resource_filename
raw_files = ('db.sql3',)
return [resource_filename(__name__, k) for k in raw_files]
def type(self):
return 'sqlite'
def add_commands(self, parser):
from . import __doc__ as docs
subparsers = self.setup_parser(parser,
"UTFVP database", docs)
"UTFVP Fingervein database", docs)
# example: get the "create" action from a submodule
from .create import add_command as create_command
......@@ -109,7 +110,7 @@ class Interface(BaseInterface):
parser.add_argument('-e', '--extension', default='', help="if given, this extension will be appended to every entry returned.")
parser.add_argument('-p', '--protocol', help="if given, limits the dump to a particular subset of the data that corresponds to the given protocol.", choices=db.protocol_names() if db.is_valid() else ())
parser.add_argument('-u', '--purpose', help="if given, this value will limit the output files to those designed for the given purposes.", choices=db.purposes() if db.is_valid() else ())
parser.add_argument('-m', '--models', type=str, help="if given, limits the dump to a particular model", choices=db.model_ids() if db.is_valid() else ())
parser.add_argument('-m', '--models', type=str, help="if given, limits the dump to a particular model")
parser.add_argument('-g', '--group', help="if given, this value will limit the output files to those belonging to a particular protocolar group.", choices=db.groups() if db.is_valid() else ())
parser.add_argument('-c', '--class', dest='sclass', help="if given, this value will limit the output files to those belonging to the given classes.", choices=('client', 'impostor'))
parser.add_argument('--self-test', dest="selftest", action='store_true', help=argparse.SUPPRESS)
......@@ -118,6 +119,6 @@ class Interface(BaseInterface):
# the "checkfiles" action
parser = subparsers.add_parser('checkfiles', help=checkfiles.__doc__)
parser.add_argument('-d', '--directory', default='', help="if given, this path will be prepended to every entry returned.")
parser.add_argument('-e', '--extension', default='', help="if given, this extension will be appended to every entry returned.")
parser.add_argument('-e', '--extension', default='.png', help="if given, this extension will be appended to every entry returned.")
parser.add_argument('--self-test', dest="selftest", action='store_true', help=argparse.SUPPRESS)
parser.set_defaults(func=checkfiles) #action
This diff is collapsed.
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
"""Table models and functionality for the UTFVP database.
"""
import os, numpy
import os
import pkg_resources
import bob.io.base
import bob.io.image
import bob.db.base
from sqlalchemy import Table, Column, Integer, String, ForeignKey, or_, and_, not_
import numpy
from sqlalchemy import Table, Column, Integer, String, ForeignKey, Boolean
from sqlalchemy import or_, and_, not_
from sqlalchemy import UniqueConstraint
from bob.db.base.sqlalchemy_migration import Enum, relationship
from sqlalchemy.orm import backref
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
protocol_model_association = Table('protocol_model_association', Base.metadata,
Column('protocol_id', Integer, ForeignKey('protocol.id')),
Column('model_id', Integer, ForeignKey('model.id')))
protocol_trainfiles_association = Table('protocol_trainfiles_association', Base.metadata,
Column('protocol_id', Integer, ForeignKey('protocol.id')),
Column('file_id', Integer, ForeignKey('file.id')))
class Client(Base):
"""Unique clients in the database, referred by a single integer"""
model_probefile_association = Table('model_probefile_association', Base.metadata,
Column('model_id', Integer, ForeignKey('model.id')),
Column('file_id', Integer, ForeignKey('file.id')))
__tablename__ = 'client'
model_enrollmentfile_association = Table('model_enrollmentfile_association', Base.metadata,
Column('model_id', Integer, ForeignKey('model.id')),
Column('file_id', Integer, ForeignKey('file.id')))
id = Column(Integer, primary_key=True)
gender_choices = ('M', 'F')
gender = Column(Enum(*gender_choices))
class Client(Base):
"""Database clients, marked by an integer identifier and the group they belong to"""
age = Column(Integer)
__tablename__ = 'client'
handedness_choices = ('R', 'L', 'X')
handedness = Column(Enum(*handedness_choices))
# Key identifier for the client
id = Column(String(20), primary_key=True)
subclient_id = Column(Integer)
publishable = Column(Boolean)
daydiff = Column(Integer)
comment = Column(String(41))
def __init__(self, id, gender, age, handedness, publishable, daydiff,
comment):
def __init__(self, id, subclient_id):
self.id = id
self.subclient_id = subclient_id
self.gender = gender
self.age = age
self.handedness = handedness
self.publishable = publishable
self.daydiff = daydiff
self.comment = comment
def gender_display(self):
"""Returns a representation of the client gender"""
return 'male' if self.gender == 'M' else 'female'
def handedness_display(self):
"""Returns a representation of the client handedness"""
return {'L': 'left', 'R': 'right', 'X': 'unknown'}[self.handedness]
def __repr__(self):
return "Client(%s)" % (self.id,)
return "Client(%04d) <%s,%s>, %d years old" % \
(self.id, self.gender_display(), self.handedness_display(), self.age)
class Model(Base):
"""Database models, marked by an integer identifier and the group they belong to"""
class Finger(Base):
"""Unique fingers in the database, referred by a string
__tablename__ = 'model'
Fingers have the format ``0003_3`` (i.e. <client>_<finger>)
"""
__tablename__ = 'finger'
# Key identifier for the client
id = Column(Integer, primary_key=True)
# Name of the protocol associated with this object
name = Column(String(20))
# Group associated with this protocol purpose object
group_choices = ('dev', 'eval')
sgroup = Column(Enum(*group_choices))
# Key identifier of the client associated with this model
client_id = Column(String(20), ForeignKey('client.id')) # for SQL
# For Python: A direct link to the enrollment File objects associated with this Model
enrollment_files = relationship("File", secondary=model_enrollmentfile_association, backref=backref("models_enroll", order_by=id))
# For Python: A direct link to the probe File objects associated with this Model
probe_files = relationship("File", secondary=model_probefile_association, backref=backref("models_probe", order_by=id))
# For Python: A direct link to the client object that this model belongs to
client = relationship("Client", backref=backref("models", order_by=id))
def __init__(self, name, client_id, sgroup):
client_id = Column(Integer, ForeignKey('client.id'))
client = relationship("Client", backref=backref("fingers", order_by=id))
name_choices = ('1', '2', '3', '4', '5', '6')
name = Column(Enum(*name_choices))
UniqueConstraint('client_id', 'name')
# finger name enumerations relationship with human-readable versions
finger_names = {
'1': 'left ring',
'2': 'left middle',
'3': 'left index',
'4': 'right index',
'5': 'right middle',
'6': 'right ring',
}
def __init__(self, client, name):
self.client = client
self.name = name
self.client_id = client_id
self.sgroup = sgroup
def name_display(self):
"""Returns a representation of the finger side"""
return Finger.finger_names[self.name]
def __repr__(self):
return "Model(%s, %s)" % (self.name, self.sgroup)
return "Finger(%04d_%s)" % (self.client.id, self.name)
class File(Base, bob.db.base.File):
"""Generic file container"""
"""Unique files in the database, referred by a string
Files have the format ``0003/0003_3_2_121224-134932`` (i.e.
``<client>/<client>_<finger>_<session>_<date>-<hour>``)
"""
__tablename__ = 'file'
# Key identifier for the file
id = Column(Integer, primary_key=True)
# Key identifier of the client associated with this file
client_id = Column(String(20), ForeignKey('client.id')) # for SQL
finger_id = Column(Integer, ForeignKey('finger.id'))
finger = relationship("Finger", backref=backref("files", order_by=id))
session_choices = ('1', '2', '3', '4')
session = Column(Enum(*session_choices))
# Unique path to this file inside the database
path = Column(String(100), unique=True)
UniqueConstraint('finger_id', 'session')
# Identifier of the claimed client associated with this file
finger_id = Column(Integer)
# this column is not really required as it can be computed from other
# information already in the database, it is only an optimisation to allow us
# to quickly filter files by ``model_id``
model_id = Column(String(8), unique=True)
# Identifier of the session
session_id = Column(Integer)
# we don't use this, but store to retrieve filename from table row
date_hour = Column(String(13))
# For Python: A direct link to the client object that this file belongs to
client = relationship("Client", backref=backref("files", order_by=id))
def __init__(self, finger, session, date_hour):
self.finger = finger
self.session = session
self.model_id = '%04d_%s_%s' % (self.finger.client.id, self.finger.name,
self.session)
self.date_hour = date_hour
def __init__(self, client_id, path, finger_id, session_id):
# call base class constructor
bob.db.base.File.__init__(self, path = path)
@property
def path(self):
return '%04d/%04d_%s_%s_%s' % (self.finger.client.id,
self.finger.client.id, self.finger.name, self.session, self.date_hour)
#self.sgroup = sgroup
self.client_id = client_id
self.finger_id = finger_id
self.session_id = session_id
def __repr__(self):
return "<File('%s')>" % self.path
@property
def unique_finger_name(self):
"""Unique name for a given finger in the database"""
return '%04d_%s' % (self.finger.client.id, self.finger.name)
def load(self, directory=None, extension='.png'):
......@@ -138,22 +195,83 @@ class File(Base, bob.db.base.File):
return bob.io.base.load(self.make_path(directory, '.png'))
def roi(self):
"""Loads region-of-interest annotations for a particular image
The returned points (see return value below) correspond to a polygon in the
2D space delimiting the finger image. It is up to you to generate a mask
out of these annotations.
Returns:
numpy.ndarray: A 2D array of 8-bit unsigned integers corresponding to
annotations for the given fingervein image. Points are loaded in (y,x)
format so, the first column of the returned array correspond to the
y-values while the second column to the x-values of each coordinate.
"""
# calculate where the annotations for this file are
directory = pkg_resources.resource_filename(__name__,
os.path.join('data', 'annotations', 'roi'))
# loads it w/o mercy ;-)
return numpy.loadtxt(self.make_path(directory, '.txt'), dtype='uint16')
class Protocol(Base):
"""UTFVP protocols"""
"""VERA protocols"""
__tablename__ = 'protocol'
# Unique identifier for this protocol object
id = Column(Integer, primary_key=True)
# Name of the protocol associated with this object
name = Column(String(20), unique=True)
# For Python: A direct link to the DevModel objects associated with this Protcol
train_files = relationship("File", secondary=protocol_trainfiles_association, backref=backref("protocols_train", order_by=id))
models = relationship("Model", secondary=protocol_model_association, backref=backref("protocol", uselist=False, order_by=id))
# Name of the protocol
name = Column(String(15), unique=True)
def __init__(self, name):
self.name = name
def __repr__(self):
return "Protocol('%s')" % (self.name,)
return "Protocol('%s')" % self.name
subset_file_association = Table('subset_file_association', Base.metadata,
Column('file_id', Integer, ForeignKey('file.id')),
Column('subset_id', Integer, ForeignKey('subset.id')))
class Subset(Base):
"""VERA protocol subsets"""
__tablename__ = 'subset'
id = Column(Integer, primary_key=True)
protocol_id = Column(Integer, ForeignKey('protocol.id'))
protocol = relationship("Protocol", backref=backref("subsets"))
group_choices = ('train', 'dev', 'eval')
group = Column(Enum(*group_choices))
purpose_choices = ('train', 'enroll', 'probe')
purpose = Column(Enum(*purpose_choices))
files = relationship("File",
secondary=subset_file_association,
backref=backref("subsets"))
avoid_self_probe = Column(Boolean, default=False)
def __init__(self, protocol, group, purpose):
self.protocol = protocol
self.group = group
self.purpose = purpose
def __repr__(self):
return "Subset(%s, %s, %s)" % (self.protocol, self.group, self.purpose)
This diff is collapsed.
This diff is collapsed.
.. vim: set fileencoding=utf-8 :
.. Thu 18 Aug 2016 18:03:09 CEST
==============
User's Guide
==============
This package contains the access API and descriptions for the `UTFVP Fingervein
database`_. It only contains the Bob_ accessor methods to use the DB directly
from python, with our certified protocols. The actual raw data for the `UTFVP
Fingervein database`_ should be downloaded from the original URL.
Database`_. It only contains the Bob_ accessor methods to use the DB directly
from python, with our certified protocols. The actual raw data for the dataset
should be downloaded from the original URL.
The Database Interface
----------------------
Data
----
The :py:class:`bob.db.utfvp.Database` complies with the standard Bob database
itnerface.
The fingervein image database consists of 1440 images taken in 2 distinct
session in two days (May 9th, 2012 and May 23rd, 2012) using a custom built
fingervein sensor. In each session, each of the 60 subjects in the dataset were
asked to present 6 fingers to the sensor twice, making up separate tries. The
six fingers are the left and right ring, middle and index fingers. Therefore,
the database contains 60x6 = 360 unique fingers.
.. todo::
Explain the particularities of the :py:class:`bob.db.utfvp.Database`.
Files in the database have a strict naming convention and are organized in
directories following their subject identifier like so:
``0003/0003_5_2_120509-141536``. The fields can be interpreted as
``<subject-id>/<subject-id>_<finger-name>_<trial>_<date>-<hour>``. The subject
identifier is written as a 4-digit number with leading zeroes, varying from 1
to 60. The finger name is one of the following:
* **1**: Left ring
* **2**: Left middle
* **3**: Left index
* **4**: Right index
* **5**: Right middle
* **6**: Right ring
The trial identifiers can vary between 1 and 4. The first two tries were
captured during the first session while the last two, on the second session.
Given the difference in the images between trials on the same day, we assume
users were asked to remove the finger from the device and re-position it
afterwards.
Protocols
---------
There are 15 protocols implemented in this package:
* 1vsall
* nom
* nomLeftRing
* nomLeftMiddle
* nomLeftIndex
* nomRightIndex
* nomRightMiddle
* nomRightRing
* full
* fullLeftRing
* fullLeftMiddle
* fullLeftIndex
* fullRightIndex
* fullRightMiddle
* fullRightRing
They are described next.
"nom" Protocols
===============
"nom" means "normal operation mode". In this set of protocols, images from
different clients are separated in different sets that can be used for system
training, validation and evaluation:
* Fingers from clients in the range [1, 10] are used on the training set
* Fingers from clients in the range [11, 28] are used on the development
(or validation) set
* Fingers from clients in the range [29, 60] are used in the evaluation
(or test) set
Data from the first session (both trials) can be used for enrolling the finger
while data on the last session (both trials) shold be used exclusively for
probing the finger. In the way setup by this database interface, each of the
samples is returned as a separate enrollment model. If a single score per
finger is required, the user must manipulate the final score listings and fuse
results themselves.
Matching happens exhaustively between all probes and models. The variants named
"nomLeftRing", for example, contain the data filtered by finger name as per the
listings above. For example, "Left Ring" means all files named
``*/*_1_*_*-*.png``. Therefore, the equivalent protocol contains only 1/6 of
the files of its complete ``nom`` version.
The following table specifies the number of samples in each set, together with
the counts of samples, models and probes in each ``nom`` protocol.
.. table:: Counts for the ``nom`` protocols
:widths: auto
================== =============== ======== ======== ============== ======== ======== ==============
Training Development Evaluation
------------------ --------------- -------------------------------- --------------------------------
Protocol Samples Models Probes Probes/Model Models Probes Probes/Model
================== =============== ======== ======== ============== ======== ======== ==============
nom 240 216 216 216 384 384 384
nomLeftRing 40 36 36 36 64 64 64
nomLeftMiddle 40 36 36 36 64 64 64
nomLeftIndex 40 36 36 36 64 64 64
nomRightIndex 40 36 36 36 64 64 64
nomRightMiddle 40 36 36 36 64 64 64
nomRightRing 40 36 36 36 64 64 64
================== =============== ======== ======== ============== ======== ======== ==============
"full" Protocols
================
"full" protocols are meant to match current practices in fingervein reporting
in which most published material don't use a separate evaluation set. All data
is placed on the development (or validation) set. In these protocols, all
images are used both for enrolling and probing for fingers. It is, of course,
a biased setup. Matching happens exhaustively between all samples in the
development set.
The variants named "fullLeftRing", for example, contain the data filtered by
finger name as per the listings above. For example, "Left Ring" means all files
named ``*/*_1_*_*-*.png``. Therefore, the equivalent protocol contains only 1/6
of the files of its complete ``full`` version.
The following table specifies the number of samples in each set, together with
the counts of samples, models and probes in each ``full`` protocol.