Commit 1a0ead4d authored by Manuel Günther's avatar Manuel Günther
Browse files

Renamed File.real_id to File.client_id to be consistent with other databases;...

Renamed File.real_id to File.client_id to be consistent with other databases; fixed small bug in {z,t}objects query.
parent e74a7392
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Laurent El Shafey <Laurent.El-Shafey@idiap.ch>
# Fri 20 May 17:00:50 2011
# Fri 20 May 17:00:50 2011
"""This script creates the BANCA database in a single pass.
"""
......@@ -12,7 +12,7 @@ from .models import *
def add_files(session, imagedir, verbose):
"""Add files (and clients) to the BANCA database."""
def add_file(session, filename, client_dict, verbose):
"""Parse a single filename and add it to the list.
Also add a client entry if not already in the database."""
......@@ -26,9 +26,9 @@ def add_files(session, imagedir, verbose):
session_id = int(v[3].split('s')[1])
if verbose: print "Adding file '%s'..." %(os.path.basename(filename).split('.')[0], )
session.add(File(int(v[0]), os.path.basename(filename).split('.')[0], v[4], v[6], session_id))
file_list = os.listdir(imagedir)
client_dict = {}
client_dict = {}
for filename in file_list:
add_file(session, os.path.join(imagedir, filename), client_dict, verbose)
......@@ -38,8 +38,8 @@ def add_subworlds(session, verbose):
# one third and two thirds
snames = ["onethird", "twothirds"]
slist = [ [9003, 9005, 9027, 9033, 9035, 9043, 9049, 9053, 9055, 9057],
[9001, 9007, 9009, 9011, 9013, 9015, 9017, 9019, 9021, 9023,
9025, 9029, 9031, 9037, 9039, 9041, 9045, 9047, 9051, 9059] ]
[9001, 9007, 9009, 9011, 9013, 9015, 9017, 9019, 9021, 9023,
9025, 9029, 9031, 9037, 9039, 9041, 9045, 9047, 9051, 9059] ]
for k in range(len(snames)):
if verbose: print "Adding subworld '%s'" %(snames[k], )
su = Subworld(snames[k])
......@@ -57,7 +57,7 @@ def add_protocols(session, verbose):
# 1. DEFINITIONS
# Numbers in the lists correspond to session identifiers
protocol_definitions = {}
# Protocol Mc
enrol = [1]
probe_c = [2, 3, 4]
......@@ -69,37 +69,37 @@ def add_protocols(session, verbose):
probe_c = [6, 7, 8]
probe_i = [5, 6, 7, 8]
protocol_definitions['Md'] = [enrol, probe_c, probe_i]
# Protocol Ma
enrol = [9]
probe_c = [10, 11, 12]
probe_i = [9, 10, 11, 12]
protocol_definitions['Ma'] = [enrol, probe_c, probe_i]
# Protocol Ud
enrol = [1]
probe_c = [6, 7, 8]
probe_i = [5, 6, 7, 8]
protocol_definitions['Ud'] = [enrol, probe_c, probe_i]
# Protocol Ua
enrol = [1]
probe_c = [10, 11, 12]
probe_i = [9, 10, 11, 12]
protocol_definitions['Ua'] = [enrol, probe_c, probe_i]
# Protocol P
enrol = [1]
probe_c = [2, 3, 4, 6, 7, 8, 10, 11, 12]
probe_i = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
protocol_definitions['P'] = [enrol, probe_c, probe_i]
# Protocol G
enrol = [1, 5, 9]
probe_c = [2, 3, 4, 6, 7, 8, 10, 11, 12]
probe_i = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
protocol_definitions['G'] = [enrol, probe_c, probe_i]
# 2. ADDITIONS TO THE SQL DATABASE
protocolPurpose_list = [('world', 'train'), ('dev', 'enrol'), ('dev', 'probe'), ('eval', 'enrol'), ('eval', 'probe')]
for proto in protocol_definitions:
......@@ -126,12 +126,12 @@ def add_protocols(session, verbose):
elif(key == 3 or key == 4): client_group = "g2"
session_list = []
session_list_i = []
if(key == 1 or key == 3):
if(key == 1 or key == 3):
session_list = protocol_definitions[proto][0]
elif(key == 2):
elif(key == 2):
session_list = protocol_definitions[proto][1]
session_list_i = protocol_definitions[proto][2]
elif(key == 4):
elif(key == 4):
session_list = protocol_definitions[proto][1]
session_list_i = protocol_definitions[proto][2]
......@@ -145,7 +145,7 @@ def add_protocols(session, verbose):
else:
for sid in session_list:
q = session.query(File).join(Client).filter(Client.sgroup == client_group).\
filter(and_(File.session_id == sid, File.real_id == File.claimed_id)).\
filter(and_(File.session_id == sid, File.client_id == File.claimed_id)).\
order_by(File.id)
for k in q:
if verbose: print " Adding protocol file '%s'..." % (k.path)
......@@ -155,7 +155,7 @@ def add_protocols(session, verbose):
if session_list_i:
for sid in session_list_i:
q = session.query(File).join(Client).filter(Client.sgroup == client_group).\
filter(and_(File.session_id == sid, File.real_id != File.claimed_id)).\
filter(and_(File.session_id == sid, File.client_id != File.claimed_id)).\
order_by(File.id)
for k in q:
if verbose: print " Adding protocol file '%s'..." % (k.path)
......@@ -180,7 +180,7 @@ def create(args):
dbfile = args.files[0]
if args.recreate:
if args.recreate:
if args.verbose and os.path.exists(dbfile):
print('unlinking %s...' % dbfile)
if os.path.exists(dbfile): os.unlink(dbfile)
......@@ -209,5 +209,5 @@ def add_command(subparsers):
parser.add_argument('-D', '--imagedir', action='store', metavar='DIR',
default='/idiap/group/vision/visidiap/databases/banca/english/images_gray',
help="Change the relative path to the directory containing the images of the BANCA database (defaults to %(default)s)")
parser.set_defaults(func=create) #action
......@@ -49,16 +49,16 @@ class Client(Base):
return "Client(%d, '%s', '%s', '%s')" % (self.id, self.gender, self.sgroup, self.language)
class Subworld(Base):
"""Database clients belonging to the world group are split in two disjoint subworlds,
"""Database clients belonging to the world group are split in two disjoint subworlds,
onethird and twothirds"""
__tablename__ = 'subworld'
# Key identifier for this Subworld object
id = Column(Integer, primary_key=True)
# Subworld to which the client belongs to
name = Column(String(20), unique=True)
# for Python: A direct link to the client
clients = relationship("Client", secondary=subworld_client_association, backref=backref("subworld", order_by=id))
......@@ -76,7 +76,7 @@ class File(Base):
# Key identifier for the file
id = Column(Integer, primary_key=True)
# Key identifier of the client associated with this file
real_id = Column(Integer, ForeignKey('client.id')) # for SQL
client_id = Column(Integer, ForeignKey('client.id')) # for SQL
# Unique path to this file inside the database
path = Column(String(100), unique=True)
# Identifier of the claimed client associated with this file
......@@ -89,8 +89,8 @@ class File(Base):
# For Python: A direct link to the client object that this file belongs to
real_client = relationship("Client", backref=backref("files", order_by=id))
def __init__(self, real_id, path, claimed_id, shot_id, session_id):
self.real_id = real_id
def __init__(self, client_id, path, claimed_id, shot_id, session_id):
self.client_id = client_id
self.path = path
self.claimed_id = claimed_id
self.shot_id = shot_id
......
......@@ -25,7 +25,7 @@ class Database(object):
def __init__(self):
# opens a session to the database - keep it open until the end
self.connect()
def connect(self):
"""Tries connecting or re-connecting to the database"""
if not os.path.exists(SQLITE_FILE):
......@@ -46,7 +46,7 @@ class Database(object):
raise RuntimeError, "Database '%s' cannot be found at expected location '%s'. Create it and then try re-connecting using Database.connect()" % (INFO.name(), SQLITE_FILE)
def __group_replace_alias__(self, l):
"""Replace 'dev' by 'g1' and 'eval' by 'g2' in a list of groups, and
"""Replace 'dev' by 'g1' and 'eval' by 'g2' in a list of groups, and
returns the new list"""
if not l: return l
elif isinstance(l, str): return self.__group_replace_alias__((l,))
......@@ -60,7 +60,7 @@ class Database(object):
def __check_validity__(self, l, obj, valid, default):
"""Checks validity of user input data against a set of valid values"""
if not l: return default
elif not isinstance(l, (tuple,list)):
elif not isinstance(l, (tuple,list)):
return self.__check_validity__((l,), obj, valid, default)
for k in l:
if k not in valid:
......@@ -73,7 +73,7 @@ class Database(object):
return ProtocolPurpose.group_choices
def client_groups(self):
"""Returns the names of the XM2VTS groups. This is specific to this database which
"""Returns the names of the XM2VTS groups. This is specific to this database which
does not have separate training, development and evaluation sets."""
return Client.group_choices
......@@ -85,7 +85,7 @@ class Database(object):
def languages(self):
"""Returns the list of languages"""
return Client.language_choices
def subworld_names(self):
......@@ -130,8 +130,8 @@ class Database(object):
subworld
Specify a split of the world data ('onethird', 'twothirds')
In order to be considered, 'world' should be in groups and only one
split should be specified.
In order to be considered, 'world' should be in groups and only one
split should be specified.
Returns: A list containing all the clients which have the given properties.
"""
......@@ -177,7 +177,7 @@ class Database(object):
protocol
One of the BANCA protocols ('P', 'G', 'Mc', 'Md', 'Ma', 'Ud', 'Ua').
groups
The groups to which the clients belong ('g1', 'g2').
Note that 'dev' is an alias to 'g1' and 'eval' an alias to 'g2'
......@@ -203,7 +203,7 @@ class Database(object):
protocol
One of the BANCA protocols ('P', 'G', 'Mc', 'Md', 'Ma', 'Ud', 'Ua').
groups
The groups to which the clients belong ('g1', 'g2').
Note that 'dev' is an alias to 'g1' and 'eval' an alias to 'g2'
......@@ -230,7 +230,7 @@ class Database(object):
protocol
One of the BANCA protocols ('P', 'G', 'Mc', 'Md', 'Ma', 'Ud', 'Ua').
groups
The groups to which the subjects attached to the models belong ('g1', 'g2', 'world')
Note that 'dev' is an alias to 'g1' and 'eval' an alias to 'g2'
......@@ -247,7 +247,7 @@ class Database(object):
protocol
One of the BANCA protocols ('P', 'G', 'Mc', 'Md', 'Ma', 'Ud', 'Ua').
groups
The groups to which the clients belong ('g1', 'g2').
Note that 'dev' is an alias to 'g1' and 'eval' an alias to 'g2'
......@@ -272,7 +272,7 @@ class Database(object):
def get_client_id_from_model_id(self, model_id):
"""Returns the client_id attached to the given model_id
Keyword Parameters:
model_id
......@@ -284,7 +284,7 @@ class Database(object):
def get_client_id_from_tmodel_id(self, tmodel_id):
"""Returns the client_id attached to the given T-Norm model_id
Keyword Parameters:
tmodel_id
......@@ -294,7 +294,7 @@ class Database(object):
"""
return tmodel_id
def objects(self, protocol=None, purposes=None, model_ids=None, groups=None,
def objects(self, protocol=None, purposes=None, model_ids=None, groups=None,
classes=None, languages=None, subworld=None):
"""Returns a set of Files for the specific query by the user.
......@@ -305,35 +305,35 @@ class Database(object):
purposes
The purposes required to be retrieved ('enrol', 'probe', 'train') or a tuple
with several of them. If 'None' is given (this is the default), it is
with several of them. If 'None' is given (this is the default), it is
considered the same as a tuple with all possible values. This field is
ignored for the data from the "world" group.
model_ids
Only retrieves the files for the provided list of model ids (claimed
client id). If 'None' is given (this is the default), no filter over
Only retrieves the files for the provided list of model ids (claimed
client id). If 'None' is given (this is the default), no filter over
the model_ids is performed.
groups
One of the groups ('dev', 'eval', 'world') or a tuple with several of them.
If 'None' is given (this is the default), it is considered the same as a
One of the groups ('dev', 'eval', 'world') or a tuple with several of them.
If 'None' is given (this is the default), it is considered the same as a
tuple with all possible values.
classes
The classes (types of accesses) to be retrieved ('client', 'impostor')
or a tuple with several of them. If 'None' is given (this is the
The classes (types of accesses) to be retrieved ('client', 'impostor')
or a tuple with several of them. If 'None' is given (this is the
default), it is considered the same as a tuple with all possible values.
languages
The language spoken by the clients ('en')
TODO: only English is currently supported
If 'None' is given (this is the default), it is considered the same as a
If 'None' is given (this is the default), it is considered the same as a
tuple with all possible values.
subworld
Specify a split of the world data ('onethird', 'twothirds')
In order to be considered, 'world' should be in groups and only one
split should be specified.
In order to be considered, 'world' should be in groups and only one
split should be specified.
Returns: A list of files which have the given properties.
"""
......@@ -371,41 +371,41 @@ class Database(object):
filter(Client.language.in_(languages))
if model_ids:
q = q.filter(Client.id.in_(model_ids))
q = q.order_by(File.real_id, File.session_id, File.claimed_id, File.shot_id)
q = q.order_by(File.client_id, File.session_id, File.claimed_id, File.shot_id)
retval += list(q)
if ('dev' in groups or 'eval' in groups):
if('enrol' in purposes):
q = self.session.query(File).join(Client).join(ProtocolPurpose, File.protocolPurposes).join(Protocol).\
filter(and_(Protocol.name.in_(protocol), ProtocolPurpose.sgroup.in_(groups), ProtocolPurpose.purpose == 'enrol'))
if model_ids:
q = q.filter(Client.id.in_(model_ids))
q = q.order_by(File.real_id, File.session_id, File.claimed_id, File.shot_id)
q = q.order_by(File.client_id, File.session_id, File.claimed_id, File.shot_id)
retval += list(q)
if('probe' in purposes):
if('client' in classes):
q = self.session.query(File).join(Client).join(ProtocolPurpose, File.protocolPurposes).join(Protocol).\
filter(File.real_id == File.claimed_id).\
filter(File.client_id == File.claimed_id).\
filter(and_(Protocol.name.in_(protocol), ProtocolPurpose.sgroup.in_(groups), ProtocolPurpose.purpose == 'probe'))
if model_ids:
q = q.filter(Client.id.in_(model_ids))
q = q.order_by(File.real_id, File.session_id, File.claimed_id, File.shot_id)
q = q.order_by(File.client_id, File.session_id, File.claimed_id, File.shot_id)
retval += list(q)
if('impostor' in classes):
q = self.session.query(File).join(Client).join(ProtocolPurpose, File.protocolPurposes).join(Protocol).\
filter(File.real_id != File.claimed_id).\
filter(File.client_id != File.claimed_id).\
filter(and_(Protocol.name.in_(protocol), ProtocolPurpose.sgroup.in_(groups), ProtocolPurpose.purpose == 'probe'))
if model_ids:
q = q.filter(File.claimed_id.in_(model_ids))
q = q.order_by(File.real_id, File.session_id, File.claimed_id, File.shot_id)
q = q.order_by(File.client_id, File.session_id, File.claimed_id, File.shot_id)
retval += list(q)
return list(set(retval)) # To remove duplicates
def tobjects(self, protocol=None, model_ids=None, groups=None, languages=None):
"""Returns a set of Files for enrolling T-norm models for score
"""Returns a set of Files for enrolling T-norm models for score
normalization.
Keyword Parameters:
......@@ -414,8 +414,8 @@ class Database(object):
One of the BANCA protocols ('P', 'G', 'Mc', 'Md', 'Ma', 'Ud', 'Ua').
model_ids
Only retrieves the files for the provided list of model ids (claimed
client id). If 'None' is given (this is the default), no filter over
Only retrieves the files for the provided list of model ids (claimed
client id). If 'None' is given (this is the default), no filter over
the model_ids is performed.
groups
......@@ -424,10 +424,10 @@ class Database(object):
languages
The language spoken by the clients ('en')
TODO: only English is currently supported
If 'None' is given (this is the default), it is considered the same as a
If 'None' is given (this is the default), it is considered the same as a
tuple with all possible values.
Returns: A list of Files which have the given properties.
Returns: A list of Files which have the given properties.
"""
VALID_GROUPS = ('dev', 'eval')
......@@ -438,7 +438,7 @@ class Database(object):
tgroups.append('eval')
if 'eval' in groups:
tgroups.append('dev')
return self.objects(directory, extension, protocol, 'enrol', model_ids, tgroups, 'client', languages)
return self.objects(protocol, 'enrol', model_ids, tgroups, 'client', languages)
def zobjects(self, protocol=None, model_ids=None, groups=None, languages=None):
"""Returns a set of Files to perform Z-norm score normalization.
......@@ -449,8 +449,8 @@ class Database(object):
One of the BANCA protocols ('P', 'G', 'Mc', 'Md', 'Ma', 'Ud', 'Ua').
model_ids
Only retrieves the files for the provided list of model ids (claimed
client id). If 'None' is given (this is the default), no filter over
Only retrieves the files for the provided list of model ids (claimed
client id). If 'None' is given (this is the default), no filter over
the model_ids is performed.
groups
......@@ -459,7 +459,7 @@ class Database(object):
languages
The language spoken by the clients ('en')
TODO: only English is currently supported
If 'None' is given (this is the default), it is considered the same as a
If 'None' is given (this is the default), it is considered the same as a
tuple with all possible values.
Returns: A list of Files which have the given properties.
......@@ -473,7 +473,7 @@ class Database(object):
zgroups.append('eval')
if 'eval' in groups:
zgroups.append('dev')
return self.objects(directory, extension, protocol, 'probe', model_ids, zgroups, None, languages)
return self.objects(protocol, 'probe', model_ids, zgroups, None, languages)
def protocol_names(self):
"""Returns all registered protocol names"""
......@@ -560,4 +560,4 @@ class Database(object):
for p in paths:
retval.extend([k.id for k in fobj if k.path == p])
return retval
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment