From 098da6ea44502cbe0b3ded55a1a892ff1da4c2ed Mon Sep 17 00:00:00 2001 From: Amir MOHAMMADI <amir.mohammadi@idiap.ch> Date: Wed, 17 May 2017 12:58:37 +0200 Subject: [PATCH] Initial version of bob.db.swan --- .gitignore | 1 + MANIFEST.in | 2 +- bob/db/swan/create.py | 584 +++++++++++++++--------- bob/db/swan/models.py | 29 +- bob/db/swan/query.py | 503 ++++---------------- bob/db/swan/script/__init__.py | 0 bob/db/swan/script/generate_filelist.py | 13 - bob/db/swan/test.py | 51 +++ doc/conf.py | 1 - doc/index.rst | 16 +- doc/nitpick-exceptions.txt | 2 + requirements.txt | 11 + setup.py | 6 +- 13 files changed, 557 insertions(+), 662 deletions(-) delete mode 100644 bob/db/swan/script/__init__.py delete mode 100644 bob/db/swan/script/generate_filelist.py create mode 100644 doc/nitpick-exceptions.txt diff --git a/.gitignore b/.gitignore index befa401..3d37733 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,4 @@ develop-eggs sphinx dist conda +*.sql3 diff --git a/MANIFEST.in b/MANIFEST.in index 9d10504..dc02b40 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,2 +1,2 @@ -include README.rst bootstrap-buildout.py buildout.cfg COPYING version.txt requirements.txt +include README.rst LICENSE version.txt requirements.txt recursive-include doc *.py *.rst *.ico *.png diff --git a/bob/db/swan/create.py b/bob/db/swan/create.py index a584d88..577b0d5 100644 --- a/bob/db/swan/create.py +++ b/bob/db/swan/create.py @@ -4,9 +4,249 @@ """This script creates the SWAN database in a single pass. """ -import os - from .models import Client, File, Protocol, ProtocolPurpose, Base +from collections import defaultdict +from glob import glob +import json +import os +from sqlalchemy import and_ + +SITE_MAPPING = { + '1': 'NTNU', + '2': 'UIO', + '3': 'MPH-FRA', + '4': 'IDIAP', + '6': 'MPH-IND', +} + +MODALITY_MAPPING = { + '1': 'face', + '2': 'voice', + '3': 'eye', + '4': 'finger', +} + +DEVICE_MAPPING = { + 'p': 'iPhone', + 't': 'iPad', +} + +SESSION1_DATAFORMAT = '''01 p 1.png:rear,4032x3024 +02 p 1.png:rear,4032x3024 +03 p 1.png:rear,4032x3024 +04 p 1.png:rear,4032x3024 +05 p 1.png:rear,4032x3024 +01 p 1.mp4:rear,1280x720,5s +02 p 1.mp4:rear,1280x720,5s +06 p 1.png:front,2576x1932 +07 p 1.png:front,2576x1932 +08 p 1.png:front,2576x1932 +09 p 1.png:front,2576x1932 +10 p 1.png:front,2576x1932 +03 p 1.mp4:front,1280x720,5s +04 p 1.mp4:front,1280x720,5s +01 t 1.png:rear,3264x2448 +02 t 1.png:rear,3264x2448 +03 t 1.png:rear,3264x2448 +04 t 1.png:rear,3264x2448 +05 t 1.png:rear,3264x2448 +01 t 1.mp4:rear,1280x720,5s +02 t 1.mp4:rear,1280x720,5s +06 t 1.png:front,1280x960 +07 t 1.png:front,1280x960 +08 t 1.png:front,1280x960 +09 t 1.png:front,1280x960 +10 t 1.png:front,1280x960 +03 t 1.mp4:front,1280x720,5s +04 t 1.mp4:front,1280x720,5s +01 p 2.mp4:front,1280x720 +02 p 2.mp4:front,1280x720 +03 p 2.mp4:front,1280x720 +04 p 2.mp4:front,1280x720 +05 p 2.mp4:front,1280x720 +06 p 2.mp4:front,1280x720 +07 p 2.mp4:front,1280x720 +08 p 2.mp4:front,1280x720 +01 t 2.mp4:front,1280x720 +02 t 2.mp4:front,1280x720 +03 t 2.mp4:front,1280x720 +04 t 2.mp4:front,1280x720 +05 t 2.mp4:front,1280x720 +06 t 2.mp4:front,1280x720 +07 t 2.mp4:front,1280x720 +08 t 2.mp4:front,1280x720 +01 p 3.png:rear,4032x3024 +02 p 3.png:rear,4032x3024 +03 p 3.png:rear,4032x3024 +04 p 3.png:rear,4032x3024 +05 p 3.png:rear,4032x3024 +01 p 3.mp4:rear,1280x720,5s +02 p 3.mp4:rear,1280x720,5s +06 p 3.png:front,2576x1932 +07 p 3.png:front,2576x1932 +08 p 3.png:front,2576x1932 +09 p 3.png:front,2576x1932 +10 p 3.png:front,2576x1932 +03 p 3.mp4:front,1280x720,5s +04 p 3.mp4:front,1280x720,5s +01 t 3.png:rear,3264x2448 +02 t 3.png:rear,3264x2448 +03 t 3.png:rear,3264x2448 +04 t 3.png:rear,3264x2448 +05 t 3.png:rear,3264x2448 +01 t 3.mp4:rear,1280x720,5s +02 t 3.mp4:rear,1280x720,5s +06 t 3.png:front,1280x960 +07 t 3.png:front,1280x960 +08 t 3.png:front,1280x960 +09 t 3.png:front,1280x960 +10 t 3.png:front,1280x960 +03 t 3.mp4:front,1280x720,5s +04 t 3.mp4:front,1280x720,5s +01 p 4.png:rear,4032x3024 +02 p 4.png:rear,4032x3024 +03 p 4.png:rear,4032x3024 +04 p 4.png:rear,4032x3024 +05 p 4.png:rear,4032x3024 +01 p 4.mp4:rear,1280x720,5s +02 p 4.mp4:rear,1280x720,5s +06 p 4.png:rear,4032x3024 +07 p 4.png:rear,4032x3024 +08 p 4.png:rear,4032x3024 +09 p 4.png:rear,4032x3024 +10 p 4.png:rear,4032x3024 +03 p 4.mp4:rear,1280x720,5s +04 p 4.mp4:rear,1280x720,5s +11 p 4.png:rear,4032x3024 +12 p 4.png:rear,4032x3024 +13 p 4.png:rear,4032x3024 +14 p 4.png:rear,4032x3024 +15 p 4.png:rear,4032x3024 +05 p 4.mp4:rear,1280x720,5s +06 p 4.mp4:rear,1280x720,5s +16 p 4.png:rear,4032x3024 +17 p 4.png:rear,4032x3024 +18 p 4.png:rear,4032x3024 +19 p 4.png:rear,4032x3024 +20 p 4.png:rear,4032x3024 +07 p 4.mp4:rear,1280x720,5s +08 p 4.mp4:rear,1280x720,5s +01 t 4.png:rear,3264x2448 +02 t 4.png:rear,3264x2448 +03 t 4.png:rear,3264x2448 +04 t 4.png:rear,3264x2448 +05 t 4.png:rear,3264x2448 +01 t 4.mp4:rear,1280x720,5s +02 t 4.mp4:rear,1280x720,5s +06 t 4.png:rear,3264x2448 +07 t 4.png:rear,3264x2448 +08 t 4.png:rear,3264x2448 +09 t 4.png:rear,3264x2448 +10 t 4.png:rear,3264x2448 +03 t 4.mp4:rear,1280x720,5s +04 t 4.mp4:rear,1280x720,5s +11 t 4.png:rear,3264x2448 +12 t 4.png:rear,3264x2448 +13 t 4.png:rear,3264x2448 +14 t 4.png:rear,3264x2448 +15 t 4.png:rear,3264x2448 +05 t 4.mp4:rear,1280x720,5s +06 t 4.mp4:rear,1280x720,5s +16 t 4.png:rear,3264x2448 +17 t 4.png:rear,3264x2448 +18 t 4.png:rear,3264x2448 +19 t 4.png:rear,3264x2448 +20 t 4.png:rear,3264x2448 +07 t 4.mp4:rear,1280x720,5s +08 t 4.mp4:rear,1280x720,5s +''' + +SESSION2_DATAFORMAT = '''01 p 1.mp4:front,1280x720,5s +02 p 1.mp4:front,1280x720,5s +01 p 2.mp4:front,1280x720 +02 p 2.mp4:front,1280x720 +03 p 2.mp4:front,1280x720 +04 p 2.mp4:front,1280x720 +05 p 2.mp4:front,1280x720 +06 p 2.mp4:front,1280x720 +07 p 2.mp4:front,1280x720 +08 p 2.mp4:front,1280x720 +01 p 3.png:rear,4032x3024 +02 p 3.png:rear,4032x3024 +03 p 3.png:rear,4032x3024 +04 p 3.png:rear,4032x3024 +05 p 3.png:rear,4032x3024 +01 p 3.mp4:rear,1280x720,5s +02 p 3.mp4:rear,1280x720,5s +06 p 3.png:front,2576x1932 +07 p 3.png:front,2576x1932 +08 p 3.png:front,2576x1932 +09 p 3.png:front,2576x1932 +10 p 3.png:front,2576x1932 +03 p 3.mp4:front,1280x720,5s +04 p 3.mp4:front,1280x720,5s +01 p 4.png:rear,4032x3024 +02 p 4.png:rear,4032x3024 +03 p 4.png:rear,4032x3024 +04 p 4.png:rear,4032x3024 +05 p 4.png:rear,4032x3024 +01 p 4.mp4:rear,1280x720,5s +02 p 4.mp4:rear,1280x720,5s +06 p 4.png:rear,4032x3024 +07 p 4.png:rear,4032x3024 +08 p 4.png:rear,4032x3024 +09 p 4.png:rear,4032x3024 +10 p 4.png:rear,4032x3024 +03 p 4.mp4:rear,1280x720,5s +04 p 4.mp4:rear,1280x720,5s +11 p 4.png:rear,4032x3024 +12 p 4.png:rear,4032x3024 +13 p 4.png:rear,4032x3024 +14 p 4.png:rear,4032x3024 +15 p 4.png:rear,4032x3024 +05 p 4.mp4:rear,1280x720,5s +06 p 4.mp4:rear,1280x720,5s +16 p 4.png:rear,4032x3024 +17 p 4.png:rear,4032x3024 +18 p 4.png:rear,4032x3024 +19 p 4.png:rear,4032x3024 +20 p 4.png:rear,4032x3024 +07 p 4.mp4:rear,1280x720,5s +08 p 4.mp4:rear,1280x720,5s +''' + +KNOWLEDGE = { + '01': {'total': 128, 'video': 24, 'image': 40, 'device': ['p', 't']}, + '02': {'total': 52, 'video': 22, 'image': 30, 'device': ['p']}, + '03': {'total': 52, 'video': 22, 'image': 30, 'device': ['p']}, + '04': {'total': 52, 'video': 22, 'image': 30, 'device': ['p']}, + '05': {'total': 52, 'video': 22, 'image': 30, 'device': ['p']}, + '06': {'total': 52, 'video': 22, 'image': 30, 'device': ['p']}, + '07': {'total': 52, 'video': 22, 'image': 30, 'device': ['p']}, +} + + +def parse_data_format(lines): + data_format = defaultdict( + lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(list)))) + for line in lines.split('\n'): + if not line: + continue + nrecording, device, biometrics = line.split() + biometrics, extension = biometrics.split('.') + extension, data = extension.split(':') + data = data.split(',') + data_format[device][biometrics][extension][nrecording] = data + return json.loads(json.dumps(data_format)) + + +KNOWLEDGE['01']['data'] = parse_data_format(SESSION1_DATAFORMAT) +__session2_data_format_loaded = parse_data_format(SESSION2_DATAFORMAT) +KNOWLEDGE['02']['data'] = __session2_data_format_loaded +KNOWLEDGE['03']['data'] = __session2_data_format_loaded +KNOWLEDGE['04']['data'] = __session2_data_format_loaded +KNOWLEDGE['05']['data'] = __session2_data_format_loaded +KNOWLEDGE['06']['data'] = __session2_data_format_loaded def nodot(item): @@ -14,168 +254,127 @@ def nodot(item): return item[0] != '.' -def add_clients_and_files(sql_session, datadir, extensions, verbose): +def add_clients_and_files(sql_session, datadir, verbose): """Add files to the SWAN database.""" - def add_file(session, datadir, location, client_id_dir, session_device, basename, verbose): + client_dict = {} + + def add_file(sql_session, fullpath, verbose): """Parse a single filename and add it to the list. - Also add a client entry if not already in the database.""" - v = os.path.splitext(basename)[0].split('_') - bname = os.path.splitext(basename)[0] + Also add a client entry if not already in the database. + Example: IDIAP/session_01/iPhone/00001/4_00001_m_01_01_p_1.mp4 + + """ + basename = os.path.basename(fullpath) + site, identity, gender, session, nrecording, device, biometrics = \ + basename.split('_') + biometrics, extension = biometrics.split('.') + + try: + camera = KNOWLEDGE[session]['data'][ + device][biometrics][extension][nrecording] + except KeyError: + print('File found but not in KNOWLEDGE! {}'.format(fullpath)) + return + camera = camera[0] + modality = MODALITY_MAPPING[biometrics] + site = SITE_MAPPING[site] + device = DEVICE_MAPPING[device] + + if extension == 'mp4': + recording = 'video' + elif extension == 'png': + recording = 'photo' + else: + raise RuntimeError('Unknown file extension {}'.format(extension)) + full_bname = os.path.join( - location, client_id_dir, session_device, bname) + site, 'session_{}'.format(session), device, identity, + basename) - gender = '' - if v[0][0] == 'm': + if gender == 'm': gender = 'male' - if v[0][0] == 'f': + elif gender == 'f': gender = 'female' - institute = int(v[0][1]) - institute_dir = '' - if institute == 0: - institute = 'idiap' - institute_dir = 'idiap' - elif institute == 1: - institute = 'manchester' - institute_dir = 'uman' - elif institute == 2: - institute = 'surrey' - institute_dir = 'unis' - elif institute == 3: - institute = 'oulu' - institute_dir = 'uoulu' - elif institute == 4: - institute = 'brno' - institute_dir = 'but' - elif institute == 5: - institute = 'avignon' - institute_dir = 'lia' - if institute_dir != location: - error_msg = "File: %s -- Find location %s in directory of location %s!" % ( - full_bname, location, institute_dir) - raise RuntimeError(error_msg) - client_id = v[0][1:4] - if v[0][0:4] != client_id_dir: - error_msg = "File: %s -- Find identity %s in directory of identity %s!" % ( - full_bname, v[0][0:4], client_id) - raise RuntimeError(error_msg) - if not (client_id in client_dict): - if (institute == 'surrey' or institute == 'avignon'): - group = 'world' - elif (institute == 'manchester' or institute == 'oulu'): - group = 'dev' - elif (institute == 'idiap' or institute == 'brno'): - group = 'eval' + else: + raise RuntimeError('Unknown gender {}'.format(gender)) + + # identify the group of this identity + identity = int(identity) + if identity < 25: + group = 'world' + elif identity < 41: + group = 'dev' + elif identity < 61: + group = 'eval' + else: + raise RuntimeError('Unknown IDIAP identity {}'.format(identity)) + + # check if the client has been added yet. + client_key = (identity, site) + if client_key not in client_dict: if verbose > 1: - print(" Adding client %d..." % int(client_id)) - session.add(Client(int(client_id), group, gender, institute)) - client_dict[client_id] = True - - w = session_device.split('_') - session_id_from_dir = int(w[0]) - device_from_dir = w[1] - - session_id = int(v[1]) - speech_type = v[2][0] - shot_id = v[2][1:3] - environment = v[3][0] - device = v[3][1] - if(device == '0'): - device = 'mobile' - elif(device == '1'): - device = 'laptop' - if device != device_from_dir: - error_msg = "File: %s -- Find device %s in directory of device %s!" % ( - full_bname, device, device_from_dir) - raise RuntimeError(error_msg) - if session_id != session_id_from_dir: - error_msg = "File: %s -- Find session_id %d in directory of session_id %d!" % ( - full_bname, session_id, session_id_from_dir) - raise RuntimeError(error_msg) - channel = int(v[4][0]) - + print(" Adding client {}, {}...".format(site, identity)) + client = Client(identity, group, gender, site) + sql_session.add(client) + # sql_session.flush() + # sql_session.refresh(client) + client_dict[client_key] = True + else: + client = sql_session.query(Client).filter( + and_(Client.orig_id == identity, + Client.institute == site)).one() if verbose > 1: print(" Adding file '%s'..." % full_bname) - session.add(File(int(client_id), full_bname, session_id, - speech_type, shot_id, environment, device, channel)) + sql_session.add(File(full_bname, client, int(session), device, + modality, camera, recording, int(nrecording))) - client_dict = {} if verbose: print("Adding clients and files ...") - for location in filter(nodot, os.listdir(datadir)): - location_dir = os.path.join(datadir, location) - if os.path.isdir(location_dir): - for client_id in filter(nodot, os.listdir(location_dir)): - client_dir = os.path.join(location_dir, client_id) - if os.path.isdir(client_dir): - for session_device in filter(nodot, os.listdir(client_dir)): - session_device_dir = os.path.join( - client_dir, session_device) - if os.path.isdir(session_device_dir): - for filename in filter(nodot, os.listdir(session_device_dir)): - for ext in extensions: - if filename.endswith(ext): - add_file(sql_session, datadir, location, client_id, session_device, os.path.basename( - filename), verbose) + # files = open('files.txt').read().split() + files = [] + for _, site in SITE_MAPPING.items(): + files += glob(os.path.join(datadir, site, '*', '*', '*', '*.mp4')) + files += glob(os.path.join(datadir, site, '*', '*', '*', '*.png')) + for fullpath in files: + add_file(sql_session, fullpath, verbose) def add_protocols(session, verbose): """Adds protocols""" # 1. DEFINITIONS + mobile_tablet = ('iPhone', 'iPad') + mobile = ('iPhone',) + ididap_clients = list(range(1, 61)) + non_idiap_clients = [7, 8, 9, 23, 37, 44, 45, 53, 56, 57] + ididap_clients = [x for x in ididap_clients if x not in non_idiap_clients] + # Numbers in the lists correspond to session identifiers - protocol_definitions = {} - - # Split male and female clients: list of (client_id, first_session_id) # - # few exceptions with 2 as first session - clients_male = [(1, 1), (2, 1), (4, 1), (8, 1), (11, 1), (12, 1), (15, 1), (16, 1), (17, 1), (19, 2), - (21, 1), (23, 1), (24, 1), (25, 1), (26, - 1), (28, 1), (29, 1), (30, 1), (31, 1), (33, 1), - (34, 1), (103, 1), (104, 1), (106, 1), (107, - 1), (108, 1), (109, 1), (111, 1), (112, 1), (114, 1), - (115, 1), (116, 1), (117, 1), (119, 1), (120, - 1), (301, 1), (304, 1), (305, 1), (308, 1), (310, 1), - (313, 1), (314, 1), (315, 1), (317, 1), (319, - 1), (416, 1), (417, 1), (418, 1), (419, 1), (420, 1), - (421, 1), (422, 1), (423, 1), (424, 1), (425, - 1), (426, 1), (427, 1), (428, 1), (429, 1), (430, 1), - (431, 1), (432, 1)] - clients_female = [(7, 2), (9, 1), (10, 1), (22, 1), (32, 1), (118, 1), (122, 1), (123, 1), (125, 1), (126, 1), - (127, 1), (128, 1), (129, 1), (130, 1), (131, - 1), (133, 1), (302, 1), (303, 1), (306, 1), (307, 1), - (309, 1), (311, 1), (320, 1), (401, 1), (402, - 1), (403, 1), (404, 1), (405, 2), (406, 1), (407, 1), - (408, 1), (409, 1), (410, 1), (411, 1), (412, 1), (413, 1), (415, 1), (433, 1)] - train_mobile = ['mobile'] - train_all = None - enroll_laptop = [['laptop'], ['p']] - enroll_mobile = [['mobile'], ['p']] - enroll_laptop_mobile = [['laptop', 'mobile'], ['p']] - probe = [['mobile'], ['r', 'f']] - gender_male = 'male' - gender_female = 'female' - protocol_definitions['mobile0-male'] = [clients_male, - train_mobile, enroll_mobile, probe, gender_male] - protocol_definitions['mobile0-female'] = [clients_female, - train_mobile, enroll_mobile, probe, gender_female] - protocol_definitions['mobile1-male'] = [clients_male, - train_all, enroll_mobile, probe, gender_male] - protocol_definitions['mobile1-female'] = [clients_female, - train_all, enroll_mobile, probe, gender_female] - protocol_definitions['laptop1-male'] = [clients_male, - train_all, enroll_laptop, probe, gender_male] - protocol_definitions['laptop1-female'] = [clients_female, - train_all, enroll_laptop, probe, gender_female] - protocol_definitions['laptop_mobile1-male'] = [clients_male, - train_all, enroll_laptop_mobile, probe, gender_male] - protocol_definitions['laptop_mobile1-female'] = [clients_female, - train_all, enroll_laptop_mobile, probe, gender_female] + protocol_definitions = defaultdict(lambda: defaultdict(dict)) + protocol_definitions['idiap0-audio']['world']['train'] = \ + (mobile_tablet, [x for x in ididap_clients if x < 25], + ('voice',), ('front',), ('video',), tuple(range(1, 7)), ('IDIAP', )) + protocol_definitions['idiap0-audio']['dev']['enroll'] = \ + (mobile, [x for x in ididap_clients if (x >= 25) and (x < 41)], + ('voice',), ('front',), ('video',), tuple(range(1, 2)), ('IDIAP', )) + protocol_definitions['idiap0-audio']['dev']['probe'] = \ + (mobile, [x for x in ididap_clients if (x >= 25) and (x < 41)], + ('voice',), ('front',), ('video',), tuple(range(2, 7)), ('IDIAP', )) + protocol_definitions['idiap0-audio']['eval']['enroll'] = \ + (mobile, [x for x in ididap_clients if (x >= 41) and (x < 61)], + ('voice',), ('front',), ('video',), tuple(range(1, 2)), ('IDIAP', )) + protocol_definitions['idiap0-audio']['eval']['probe'] = \ + (mobile, [x for x in ididap_clients if (x >= 41) and (x < 61)], + ('voice',), ('front',), ('video',), tuple(range(2, 7)), ('IDIAP', )) # 2. ADDITIONS TO THE SQL DATABASE - protocolPurpose_list = [('world', 'train'), ('dev', 'enroll'), - ('dev', 'probe'), ('eval', 'enroll'), ('eval', 'probe')] + protocolPurpose_list = [ + ('world', 'train'), ('dev', 'enroll'), ('dev', 'probe'), + ('eval', 'enroll'), ('eval', 'probe')] + for proto in protocol_definitions: - p = Protocol(proto, protocol_definitions[proto][4]) + p = Protocol(proto) # Add protocol if verbose: print("Adding protocol '%s'..." % (proto)) @@ -184,81 +383,48 @@ def add_protocols(session, verbose): session.refresh(p) # Add protocol purposes - for key in range(len(protocolPurpose_list)): - purpose = protocolPurpose_list[key] - pu = ProtocolPurpose(p.id, purpose[0], purpose[1]) + for group, purpose in protocolPurpose_list: + pu = ProtocolPurpose(p.id, group, purpose) if verbose > 1: print(" Adding protocol purpose ('%s','%s')..." % - (purpose[0], purpose[1])) + (group, purpose)) session.add(pu) session.flush() session.refresh(pu) - # Add files attached with this protocol purpose - client_group = "" - device_list = [] - speech_list = [] - if(key == 0): - client_group = "world" - elif(key == 1 or key == 2): - client_group = "dev" - elif(key == 3 or key == 4): - client_group = "eval" - if(key == 0): - world_list = True - session_list_in = False - device_list = protocol_definitions[proto][1] - if(key == 1 or key == 3): - world_list = False - session_list_in = True - device_list = protocol_definitions[proto][2][0] - speech_list = protocol_definitions[proto][2][1] - elif(key == 2 or key == 4): - world_list = False - session_list_in = False - device_list = protocol_definitions[proto][3][0] - speech_list = protocol_definitions[proto][3][1] + # get the list of files for that group + q = session.query(File).join(Client).filter( + Client.sgroup == group).order_by(File.id) - # Adds 'protocol' files - # World set - if world_list: - q = session.query(File).join(Client).filter( - Client.sgroup == 'world').order_by(File.id) - if device_list: - q = q.filter(File.device.in_(device_list)) - for k in q: - if verbose > 1: - print(" Adding protocol file '%s'..." % (k.path)) - pu.files.append(k) - # Dev/eval set - else: - for client in protocol_definitions[proto][0]: - cid = client[0] # client id - sid = client[1] # session id - q = session.query(File).join(Client).\ - filter(Client.sgroup == client_group).filter( - Client.id == cid) - if session_list_in: - q = q.filter(File.session_id == sid) - else: - q = q.filter(File.session_id != sid) - if device_list: - q = q.filter(File.device.in_(device_list)) - if speech_list: - q = q.filter(File.speech_type.in_(speech_list)) - q = q.order_by(File.id) - for k in q: - if verbose > 1: - print(" Adding protocol file '%s'..." % - (k.path)) - pu.files.append(k) + device_list = protocol_definitions[proto][group][purpose][0] + if device_list: + q = q.filter(File.device.in_(device_list)) - # Add protocol - speech_type = ['p', 'l', 'r', 'f'] - mobile_only = False - if 'mobile0' in proto: - mobile_only = True - add_tmodels(session, p.id, mobile_only, speech_type, verbose) + modality_list = protocol_definitions[proto][group][purpose][2] + if modality_list: + q = q.filter(File.modality.in_(modality_list)) + + camera_list = protocol_definitions[proto][group][purpose][3] + if camera_list: + q = q.filter(File.camera.in_(camera_list)) + + recording_list = protocol_definitions[proto][group][purpose][4] + if recording_list: + q = q.filter(File.recording.in_(recording_list)) + + session_list = protocol_definitions[proto][group][purpose][5] + if session_list: + q = q.filter(File.session.in_(session_list)) + + institute_list = protocol_definitions[proto][group][purpose][6] + if institute_list: + q = q.filter(Client.institute.in_(institute_list)) + + # Adds 'protocol' files + for k in q: + if verbose > 1: + print(" Adding protocol file '%s'..." % (k.path)) + pu.files.append(k) def create_tables(args): @@ -293,7 +459,7 @@ def create(args): # the real work... create_tables(args) s = session_try_nolock(args.type, args.files[0], echo=(args.verbose > 2)) - add_files(s, args.datadir, args.extensions, args.verbose) + add_clients_and_files(s, args.datadir, args.verbose) add_protocols(s, args.verbose) s.commit() s.close() diff --git a/bob/db/swan/models.py b/bob/db/swan/models.py index cb1b5cd..702fc27 100644 --- a/bob/db/swan/models.py +++ b/bob/db/swan/models.py @@ -59,26 +59,43 @@ class File(Base, bob.db.base.File): # Key identifier for the file id = Column(Integer, primary_key=True) - # Key identifier of the client associated with this file - client_id = Column(Integer, ForeignKey('client.id')) # for SQL # Unique path to this file inside the database path = Column(String(100), unique=True) # Identifier of the session - session_id = Column(Integer) + session = Column(Integer) # Identifier of the device - device_choices = ('mobile', 'tablet') + device_choices = ('iPhone', 'iPad') device = Column(Enum(*device_choices)) + modality_choices = ('face', 'voice', 'eye', 'finger') + modality = Column(Enum(*modality_choices)) + + camera_choices = ('rear', 'front') + camera = Column(Enum(*camera_choices)) + + recording_choices = ('video', 'photo') + recording = Column(Enum(*recording_choices)) + + nrecording = Column(Integer) + + # Key identifier of the client associated with this file + client_id = Column(Integer, ForeignKey('client.id')) # for SQL # For Python: A direct link to the client object that this file belongs to client = relationship("Client", backref=backref("files", order_by=id)) - def __init__(self, client_id, path, session_id, device): + def __init__(self, path, client, session, device, modality, + camera, recording, nrecording): # call base class constructor bob.db.base.File.__init__(self, path=path) # fill the remaining bits of the file information - self.client_id = client_id + self.client = client + self.session = session self.device = device + self.modality = modality + self.camera = camera + self.recording = recording + self.nrecording = nrecording class Protocol(Base): diff --git a/bob/db/swan/query.py b/bob/db/swan/query.py index 00b11d4..e8876c8 100644 --- a/bob/db/swan/query.py +++ b/bob/db/swan/query.py @@ -2,29 +2,30 @@ # vim: set fileencoding=utf-8 : """This module provides the Dataset interface allowing the user to query the -MOBIO database in the most obvious ways. +SWAN database in the most obvious ways. """ -import six -from .models import * from .driver import Interface +from .models import File, Client, Protocol, ProtocolPurpose from sqlalchemy import and_, not_ - import bob.db.base SQLITE_FILE = Interface().files()[0] class Database(bob.db.base.SQLiteDatabase): - """The dataset class opens and maintains a connection opened to the Database. + """The dataset class opens and maintains a connection opened to the + Database. - It provides many different ways to probe for the characteristics of the data - and for the data itself inside the database. + It provides many different ways to probe for the characteristics of the + data and for the data itself inside the database. """ - def __init__(self, original_directory=None, original_extension=None, annotation_directory=None, annotation_extension='.pos'): + def __init__(self, original_directory=None, original_extension=None, + annotation_directory=None, annotation_extension='.pos'): # call base class constructors to open a session to the database - super(Database, self).__init__(SQLITE_FILE, File) + super(Database, self).__init__( + SQLITE_FILE, File, original_directory, original_extension) self.annotation_directory = annotation_directory self.annotation_extension = annotation_extension @@ -32,85 +33,73 @@ class Database(bob.db.base.SQLiteDatabase): def groups(self, protocol=None): """Returns the names of all registered groups""" - return ProtocolPurpose.group_choices + return Client.group_choices def genders(self): """Returns the list of genders""" return Client.gender_choices - def subworld_names(self): - """Returns all registered subworld names""" + def institutes(self): + """Returns the list of institutes""" - self.assert_validity() - l = self.subworlds() - retval = [str(k.name) for k in l] + return Client.institute_choices + + def protocol_names(self): + """Returns all registered protocol names""" + + retval = [str(k.name) for k in self.protocols()] return retval - def subworlds(self): - """Returns the list of subworlds""" + def protocols(self): + """Returns all registered protocols""" - return list(self.query(Subworld)) + return list(self.query(Protocol)) - def has_subworld(self, name): - """Tells if a certain subworld is available""" + def has_protocol(self, name): + """Tells if a certain protocol is available""" - self.assert_validity() - return self.query(Subworld).filter(Subworld.name == name).count() != 0 - - def _replace_protocol_alias(self, protocol): - if protocol == 'male': - return 'mobile0-male' - elif protocol == 'female': - return 'mobile0-female' - else: - return protocol - - def _replace_protocols_alias(self, protocol): - # print(protocol) - if protocol: - from six import string_types - if isinstance(protocol, string_types): - # print([self._replace_protocol_alias(protocol)]) - return [self._replace_protocol_alias(protocol)] - else: - # print(list(set(self._replace_protocol_alias(k) for k in protocols))) - return list(set(self._replace_protocol_alias(k) for k in protocols)) - else: - return None + return self.query(Protocol).filter(Protocol.name == name).count() != 0 - def clients(self, protocol=None, groups=None, subworld=None, gender=None): - """Returns a list of Clients for the specific query by the user. + def protocol(self, name): + """Returns the protocol object in the database given a certain name. + Raises an error if that does not exist.""" - Keyword Parameters: + return self.query(Protocol).filter(Protocol.name == name).one() - protocol - One of the MOBIO protocols ('mobile0-male', 'mobile0-female', 'mobile1-male', 'mobile1-female', \ - 'laptop1-male', 'laptop1-female', 'laptop_mobile1-male', 'laptop_mobile1-female') - 'male'and 'female' are aliases for 'mobile0-male' and 'mobile0-female', respectively. + def protocol_purposes(self): + """Returns all registered protocol purposes""" - groups - The groups to which the clients belong ('dev', 'eval', 'world') - Please note that world data are protocol/gender independent + return list(self.query(ProtocolPurpose)) - subworld - Specify a split of the world data ('onethird', 'twothirds', 'twothirds-subsampled') - In order to be considered, 'world' should be in groups and only one - split should be specified. + def purposes(self): + """Returns the list of allowed purposes""" - gender + return ProtocolPurpose.purpose_choices + + def clients(self, protocol=None, groups=None, gender=None, institute=None): + """Returns a list of Clients for the specific query by the user. + + Parameters + ---------- + protocol : :py:obj:`str` or :py:obj:`None`, optional + One of the SWAN protocols. See :py:meth:`protocol_names` + groups : :py:obj:`str` or :py:obj:`None`, optional + The groups to which the clients belong ('dev', 'eval', 'world') + gender : :py:obj:`str` or :py:obj:`None`, optional The gender to consider ('male', 'female') + institute : :py:obj:`str` or :py:obj:`None`, optional + The institute of the client. See :py:meth:`institutes`. - Returns: A list containing all the clients which have the given properties. + Returns + ------- + list of :py:class:`bob.db.swan.models.Client` + A list containing all the clients which have the given properties. """ - - protocol = self._replace_protocols_alias(protocol) protocol = self.check_parameters_for_validity( protocol, "protocol", self.protocol_names(), []) groups = self.check_parameters_for_validity( groups, "group", self.groups(), self.groups()) - subworld = self.check_parameters_for_validity( - subworld, "subworld", self.subworld_names(), []) gender = self.check_parameters_for_validity( gender, "gender", self.genders(), []) @@ -118,9 +107,6 @@ class Database(bob.db.base.SQLiteDatabase): retval = [] if 'world' in groups: q = self.query(Client).filter(Client.sgroup == 'world') - if subworld: - q = q.join((Subworld, Client.subworld)).filter( - Subworld.name.in_(subworld)) if gender: q = q.filter(Client.gender.in_(gender)) q = q.order_by(Client.id) @@ -158,67 +144,13 @@ class Database(bob.db.base.SQLiteDatabase): return self.query(Client).filter(Client.id == id).one() - def tclients(self, protocol=None, groups=None, subworld='onethird', gender=None): - """Returns a set of T-Norm clients for the specific query by the user. - - Keyword Parameters: - - protocol - One of the MOBIO protocols ('mobile0-male', 'mobile0-female', 'mobile1-male', 'mobile1-female', \ - 'laptop1-male', 'laptop1-female', 'laptop_mobile1-male', 'laptop_mobile1-female') - 'male'and 'female' are aliases for 'mobile0-male' and 'mobile0-female', respectively. - - groups - Ignored. - For the MOBIO database, this has no impact as the T-Norm clients are coming from - the 'world' set, and are hence the same for both the 'dev' and 'eval' sets. - - subworld - Specify a split of the world data ('onethird', 'twothirds', 'twothirds-subsampled') - Please note that 'onethird' is the default value. - - gender - The gender to consider ('male', 'female') - - Returns: A list containing all the T-norm clients belonging to the given group. - """ - - return self.clients(protocol, 'world', subworld, gender) - - def zclients(self, protocol=None, groups=None, subworld='onethird', gender=None): - """Returns a set of Z-Norm clients for the specific query by the user. - - Keyword Parameters: - - protocol - One of the MOBIO protocols ('mobile0-male', 'mobile0-female', 'mobile1-male', 'mobile1-female', \ - 'laptop1-male', 'laptop1-female', 'laptop_mobile1-male', 'laptop_mobile1-female') - 'male'and 'female' are aliases for 'mobile0-male' and 'mobile0-female', respectively. - - groups - Ignored. - For the MOBIO database, this has no impact as the Z-Norm clients are coming from - the 'world' set, and are hence the same for both the 'dev' and 'eval' sets. - - subworld - Specify a split of the world data ('onethird', 'twothirds', 'twothirds-subsampled') - Please note that 'onethird' is the default value. - - gender - The gender to consider ('male', 'female') - - Returns: A list containing all the Z-norm clients belonging to the given group. - """ - - return self.clients(protocol, 'world', subworld, gender) - def models(self, protocol=None, groups=None, subworld=None, gender=None): """Returns a set of models for the specific query by the user. Keyword Parameters: protocol - One of the MOBIO protocols ('mobile0-male', 'mobile0-female', 'mobile1-male', 'mobile1-female', \ + One of the SWAN protocols ('mobile0-male', 'mobile0-female', 'mobile1-male', 'mobile1-female', \ 'laptop1-male', 'laptop1-female', 'laptop_mobile1-male', 'laptop_mobile1-female') 'male'and 'female' are aliases for 'mobile0-male' and 'mobile0-female', respectively. @@ -245,7 +177,7 @@ class Database(bob.db.base.SQLiteDatabase): Keyword Parameters: protocol - One of the MOBIO protocols ('mobile0-male', 'mobile0-female', 'mobile1-male', 'mobile1-female', \ + One of the SWAN protocols ('mobile0-male', 'mobile0-female', 'mobile1-male', 'mobile1-female', \ 'laptop1-male', 'laptop1-female', 'laptop_mobile1-male', 'laptop_mobile1-female') 'male'and 'female' are aliases for 'mobile0-male' and 'mobile0-female', respectively. @@ -266,76 +198,6 @@ class Database(bob.db.base.SQLiteDatabase): return [client.id for client in self.clients(protocol, groups, subworld, gender)] - def tmodels(self, protocol=None, groups=None, subworld='onethird', gender=None): - """Returns a set of T-Norm models for the specific query by the user. - - Keyword Parameters: - - protocol - One of the MOBIO protocols ('mobile0-male', 'mobile0-female', 'mobile1-male', 'mobile1-female', \ - 'laptop1-male', 'laptop1-female', 'laptop_mobile1-male', 'laptop_mobile1-female') - 'male'and 'female' are aliases for 'mobile0-male' and 'mobile0-female', respectively. - - groups - Ignored. - For the MOBIO database, this has no impact as the T-Norm clients are coming from - the 'world' set, and are hence the same for both the 'dev' and 'eval' sets. - - subworld - Specify a split of the world data ('onethird', 'twothirds', 'twothirds-subsampled') - Please note that 'onethird' is the default value. - - gender - The gender to consider ('male', 'female') - - Returns: A list containing all the T-norm models belonging to the given group. - """ - - protocol = self._replace_protocols_alias(protocol) - protocol = self.check_parameters_for_validity( - protocol, "protocol", self.protocol_names()) - subworld = self.check_parameters_for_validity( - subworld, "subworld", self.subworld_names(), []) - gender = self.check_parameters_for_validity( - gender, "gender", self.genders(), []) - - # List of the clients - q = self.query(TModel).join(Client).join( - Protocol).filter(Protocol.name.in_(protocol)) - if subworld: - q = q.join((Subworld, Client.subworld)).filter( - Subworld.name.in_(subworld)) - if gender: - q = q.filter(Client.gender.in_(gender)) - q = q.order_by(TModel.id) - return list(q) - - def tmodel_ids(self, protocol=None, groups=None, subworld='onethird', gender=None): - """Returns a list of ids of T-Norm models for the specific query by the user. - - Keyword Parameters: - - protocol - One of the MOBIO protocols ('mobile0-male', 'mobile0-female', 'mobile1-male', 'mobile1-female', \ - 'laptop1-male', 'laptop1-female', 'laptop_mobile1-male', 'laptop_mobile1-female') - 'male'and 'female' are aliases for 'mobile0-male' and 'mobile0-female', respectively. - - groups - Ignored. - For the MOBIO database, this has no impact as the T-Norm clients are coming from - the 'world' set, and are hence the same for both the 'dev' and 'eval' sets. - - subworld - Specify a split of the world data ('onethird', 'twothirds', 'twothirds-subsampled') - Please note that 'onethird' is the default value. - - gender - The gender to consider ('male', 'female') - - Returns: A list containing the ids of all T-norm models belonging to the given group. - """ - return [tmodel.mid for tmodel in self.tmodels(protocol, groups, subworld, gender)] - def get_client_id_from_model_id(self, model_id, **kwargs): """Returns the client_id attached to the given model_id @@ -349,13 +211,13 @@ class Database(bob.db.base.SQLiteDatabase): return model_id def objects(self, protocol=None, purposes=None, model_ids=None, - groups=None, classes=None, subworld=None, gender=None, device=None): + groups=None, gender=None, device=None): """Returns a set of Files for the specific query by the user. Keyword Parameters: protocol - One of the MOBIO protocols ('mobile0-male', 'mobile0-female', 'mobile1-male', 'mobile1-female', \ + One of the SWAN protocols ('mobile0-male', 'mobile0-female', 'mobile1-male', 'mobile1-female', \ 'laptop1-male', 'laptop1-female', 'laptop_mobile1-male', 'laptop_mobile1-female') 'male'and 'female' are aliases for 'mobile0-male' and 'mobile0-female', respectively. @@ -375,16 +237,6 @@ class Database(bob.db.base.SQLiteDatabase): If 'None' is given (this is the default), it is considered the same as a tuple with all possible values. - classes - The classes (types of accesses) to be retrieved ('client', 'impostor') - or a tuple with several of them. If 'None' is given (this is the - default), it is considered the same as a tuple with all possible values. - - subworld - Specify a split of the world data ('onethird', 'twothirds', 'twothirds-subsampled') - In order to be considered, "world" should be in groups and only one - split should be specified. - gender The gender to consider ('male', 'female') @@ -394,17 +246,12 @@ class Database(bob.db.base.SQLiteDatabase): Returns: A set of Files with the given properties. """ - protocol = self._replace_protocols_alias(protocol) protocol = self.check_parameters_for_validity( protocol, "protocol", self.protocol_names()) purposes = self.check_parameters_for_validity( purposes, "purpose", self.purposes()) groups = self.check_parameters_for_validity( groups, "group", self.groups()) - classes = self.check_parameters_for_validity( - classes, "class", ('client', 'impostor')) - subworld = self.check_parameters_for_validity( - subworld, "subworld", self.subworld_names(), []) gender = self.check_parameters_for_validity( gender, "gender", self.genders(), []) device = self.check_parameters_for_validity( @@ -422,17 +269,13 @@ class Database(bob.db.base.SQLiteDatabase): q = self.query(File).join(Client).filter(Client.sgroup == 'world').join((ProtocolPurpose, File.protocol_purposes)).join(Protocol).\ filter(and_(Protocol.name.in_(protocol), ProtocolPurpose.sgroup == 'world')) - if subworld: - q = q.join((Subworld, File.subworld)).filter( - Subworld.name.in_(subworld)) if gender: q = q.filter(Client.gender.in_(gender)) if device: q = q.filter(File.device.in_(device)) if model_ids: q = q.filter(File.client_id.in_(model_ids)) - q = q.order_by(File.client_id, File.session_id, - File.speech_type, File.shot_id, File.device) + q = q.order_by(File.client_id, File.session, File.device) retval += list(q) if ('dev' in groups or 'eval' in groups): @@ -446,186 +289,35 @@ class Database(bob.db.base.SQLiteDatabase): q = q.filter(File.device.in_(device)) if model_ids: q = q.filter(Client.id.in_(model_ids)) - q = q.order_by(File.client_id, File.session_id, - File.speech_type, File.shot_id, File.device) + q = q.order_by(File.client_id, File.session, File.device) retval += list(q) if('probe' in purposes): - if('client' in classes): - q = self.query(File).join(Client).join((ProtocolPurpose, File.protocol_purposes)).join(Protocol).\ - filter(and_(Protocol.name.in_(protocol), ProtocolPurpose.sgroup.in_( - groups), ProtocolPurpose.purpose == 'probe')) - if gender: - q = q.filter(Client.gender.in_(gender)) - if device: - q = q.filter(File.device.in_(device)) - if model_ids: - q = q.filter(Client.id.in_(model_ids)) - q = q.order_by(File.client_id, File.session_id, - File.speech_type, File.shot_id, File.device) - retval += list(q) - - if('impostor' in classes): - q = self.query(File).join(Client).join((ProtocolPurpose, File.protocol_purposes)).join(Protocol).\ - filter(and_(Protocol.name.in_(protocol), ProtocolPurpose.sgroup.in_( - groups), ProtocolPurpose.purpose == 'probe')) - if gender: - q = q.filter(Client.gender.in_(gender)) - if device: - q = q.filter(File.device.in_(device)) - if len(model_ids) == 1: - q = q.filter(not_(File.client_id.in_(model_ids))) - q = q.order_by(File.client_id, File.session_id, - File.speech_type, File.shot_id, File.device) - retval += list(q) - - return list(set(retval)) # To remove duplicates - - def tobjects(self, protocol=None, model_ids=None, groups=None, subworld='onethird', gender=None, speech_type=None, device=None): - """Returns a set of filenames for enrolling T-norm models for score - normalization. - - Keyword Parameters: - - protocol - One of the MOBIO protocols ('mobile0-male', 'mobile0-female', 'mobile1-male', 'mobile1-female', \ - 'laptop1-male', 'laptop1-female', 'laptop_mobile1-male', 'laptop_mobile1-female') - 'male'and 'female' are aliases for 'mobile0-male' and 'mobile0-female', respectively. - - model_ids - Only retrieves the files for the provided list of model ids. - If 'None' is given (this is the default), no filter over - the model_ids is performed. - - groups - Ignored. - For the MOBIO database, this has no impact as the T-Norm clients are coming from - the 'world' set, and are hence the same for both the 'dev' and 'eval' sets. - - subworld - Specify a split of the world data ('onethird', 'twothirds', 'twothirds-subsampled') - Please note that 'onethird' is the default value. - - gender - The gender to consider ('male', 'female') - - speech_type - The speech type to consider ('p', 'l', 'r', 'f') - - device - The device choice to consider ('mobile', 'laptop') - - Returns: A set of Files with the given properties. - """ - - protocol = self._replace_protocols_alias(protocol) - protocol = self.check_parameters_for_validity( - protocol, "protocol", self.protocol_names()) - subworld = self.check_parameters_for_validity( - subworld, "subworld", self.subworld_names(), []) - gender = self.check_parameters_for_validity( - gender, "gender", self.genders(), []) - - if(model_ids is None): - model_ids = () - elif isinstance(model_ids, six.string_types): - model_ids = (model_ids,) - - # Now query the database - q = self.query(File, Protocol).filter( - Protocol.name.in_(protocol)).join(Client) - if subworld: - q = q.join((Subworld, File.subworld)).filter( - Subworld.name.in_(subworld)) - q = q.join((TModel, File.tmodels)).filter( - TModel.protocol_id == Protocol.id) - if model_ids: - q = q.filter(TModel.mid.in_(model_ids)) - if gender: - q = q.filter(Client.gender.in_(gender)) - if speech_type: - q = q.filter(File.speech_type.in_(speech_type)) - if device: - q = q.filter(File.device.in_(device)) - q = q.order_by(File.client_id, File.session_id, - File.speech_type, File.shot_id, File.device) - retval = [v[0] for v in q] - return list(retval) - - def zobjects(self, protocol=None, model_ids=None, groups=None, subworld='onethird', gender=None, speech_type=['r', 'f'], device=['mobile']): - """Returns a set of Files to perform Z-norm score normalization. - - Keyword Parameters: - - protocol - One of the MOBIO protocols ('mobile0-male', 'mobile0-female', 'mobile1-male', 'mobile1-female', \ - 'laptop1-male', 'laptop1-female', 'laptop_mobile1-male', 'laptop_mobile1-female') - 'male'and 'female' are aliases for 'mobile0-male' and 'mobile0-female', respectively. - - model_ids - Only retrieves the files for the provided list of model ids (claimed - client id). If 'None' is given (this is the default), no filter over - the model_ids is performed. - - groups - Ignored. - For the MOBIO database, this has no impact as the Z-Norm clients are coming from - the 'world' set, and are hence the same for both the 'dev' and 'eval' sets. - - subworld - Specify a split of the world data ('onethird', 'twothirds', 'twothirds-subsampled') - Please note that 'onethird' is the default value. - - gender - The gender to consider ('male', 'female') - - speech_type - The speech type to consider ('p', 'l', 'r', 'f') - - device - The device choice to consider ('mobile', 'laptop') - - Returns: A set of Files with the given properties. - """ - - protocol = self._replace_protocols_alias(protocol) - protocol = self.check_parameters_for_validity( - protocol, "protocol", self.protocol_names()) - groups = self.check_parameters_for_validity( - groups, "group", self.groups()) - subworld = self.check_parameters_for_validity( - subworld, "subworld", self.subworld_names(), []) - gender = self.check_parameters_for_validity( - gender, "gender", self.genders(), []) - speech_type = self.check_parameters_for_validity( - speech_type, "speech_type", File.speech_type_choices) - device = self.check_parameters_for_validity( - device, "device", File.device_choices) + q = self.query(File).join(Client).join((ProtocolPurpose, File.protocol_purposes)).join(Protocol).\ + filter(and_(Protocol.name.in_(protocol), ProtocolPurpose.sgroup.in_( + groups), ProtocolPurpose.purpose == 'probe')) + if gender: + q = q.filter(Client.gender.in_(gender)) + if device: + q = q.filter(File.device.in_(device)) + if model_ids: + q = q.filter(Client.id.in_(model_ids)) + q = q.order_by(File.client_id, File.session, File.device) + retval += list(q) - import collections - if(model_ids is None): - model_ids = () - elif not isinstance(model_ids, collections.Iterable): - model_ids = (model_ids,) + q = self.query(File).join(Client).join((ProtocolPurpose, File.protocol_purposes)).join(Protocol).\ + filter(and_(Protocol.name.in_(protocol), ProtocolPurpose.sgroup.in_( + groups), ProtocolPurpose.purpose == 'probe')) + if gender: + q = q.filter(Client.gender.in_(gender)) + if device: + q = q.filter(File.device.in_(device)) + if len(model_ids) == 1: + q = q.filter(not_(File.client_id.in_(model_ids))) + q = q.order_by(File.client_id, File.session, File.device) + retval += list(q) - # Now query the database - q = self.query(File).join(Client).filter(Client.sgroup == 'world').join((ProtocolPurpose, File.protocol_purposes)).join(Protocol).\ - filter(and_(Protocol.name.in_(protocol), - ProtocolPurpose.sgroup == 'world')) - if subworld: - q = q.join((Subworld, File.subworld)).filter( - Subworld.name.in_(subworld)) - if gender: - q = q.filter(Client.gender.in_(gender)) - if speech_type: - q = q.filter(File.speech_type.in_(speech_type)) - if device: - q = q.filter(File.device.in_(device)) - if model_ids: - q = q.filter(File.client_id.in_(model_ids)) - q = q.order_by(File.client_id, File.session_id, - File.speech_type, File.shot_id, File.device) - return list(q) + return list(set(retval)) # To remove duplicates def annotations(self, file): """Reads the annotations for the given file id from file and returns them in a dictionary. @@ -649,36 +341,3 @@ class Database(bob.db.base.SQLiteDatabase): # return the annotations as read from file return bob.db.base.read_annotation_file(annotation_file, 'eyecenter') - - def protocol_names(self): - """Returns all registered protocol names""" - - l = self.protocols() - retval = [str(k.name) for k in l] - return retval - - def protocols(self): - """Returns all registered protocols""" - - return list(self.query(Protocol)) - - def has_protocol(self, name): - """Tells if a certain protocol is available""" - - return self.query(Protocol).filter(Protocol.name == self._replace_protocol_alias(name)).count() != 0 - - def protocol(self, name): - """Returns the protocol object in the database given a certain name. Raises - an error if that does not exist.""" - - return self.query(Protocol).filter(Protocol.name == self._replace_protocol_alias(name)).one() - - def protocol_purposes(self): - """Returns all registered protocol purposes""" - - return list(self.query(ProtocolPurpose)) - - def purposes(self): - """Returns the list of allowed purposes""" - - return ProtocolPurpose.purpose_choices diff --git a/bob/db/swan/script/__init__.py b/bob/db/swan/script/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/bob/db/swan/script/generate_filelist.py b/bob/db/swan/script/generate_filelist.py deleted file mode 100644 index 63c9f37..0000000 --- a/bob/db/swan/script/generate_filelist.py +++ /dev/null @@ -1,13 +0,0 @@ -#!/usr/bin/env python -# vim: set fileencoding=utf-8 : - -"""Prints the version of bob and exits -""" - -def main(): - """Main routine, called by the script that gets the configuration of bob.blitz""" - - import bob.blitz - print (bob.blitz.get_config()) - return 0 - diff --git a/bob/db/swan/test.py b/bob/db/swan/test.py index 963b2c1..082a040 100644 --- a/bob/db/swan/test.py +++ b/bob/db/swan/test.py @@ -3,3 +3,54 @@ """Test Units """ + +from .query import Database + + +def test_idiap0_audio(): + protocol = 'idiap0-audio' + db = Database() + files = db.objects(protocol=protocol, groups='world', purposes='train') + # 20 clients, 8 recordings, (2 devices in session 1 and 1 device in + # sessions 2-6) == like it is 1 device and 7 sessions + assert len(files) == 20 * 8 * 1 * 7, len(files) + assert len(set(f.client.id for f in files)) == 20 + assert len(set(f.nrecording for f in files)) == 8 + assert len(set(f.device for f in files)) == 2 + assert len(set(f.session for f in files)) == 6 + assert set(f.client.institute for f in files) == set(['IDIAP']) + assert all(f.client.orig_id < 25 for f in files) + files = db.objects(protocol=protocol, groups='dev', purposes='enroll') + assert len(files) == 15 * 8 * 1 * 1, len(files) + assert len(set(f.client.id for f in files)) == 15 + assert len(set(f.nrecording for f in files)) == 8 + assert len(set(f.device for f in files)) == 1 + assert all(f.session == 1 for f in files) + assert set(f.client.institute for f in files) == set(['IDIAP']) + assert all(f.client.orig_id >= 25 and f.client.orig_id < 41 for f in files) + files = db.objects(protocol=protocol, groups='dev', purposes='probe') + assert len(files) == 15 * 8 * 1 * 5, len(files) + assert len(set(f.client.id for f in files)) == 15 + assert len(set(f.nrecording for f in files)) == 8 + assert len(set(f.device for f in files)) == 1 + assert len(set(f.session for f in files)) == 5 + assert all(f.session > 1 for f in files) + assert set(f.client.institute for f in files) == set(['IDIAP']) + assert all(f.client.orig_id >= 25 and f.client.orig_id < 41 for f in files) + files = db.objects(protocol=protocol, groups='eval', purposes='enroll') + assert len(files) == 15 * 8 * 1 * 1, len(files) + assert len(set(f.client.id for f in files)) == 15 + assert len(set(f.nrecording for f in files)) == 8 + assert len(set(f.device for f in files)) == 1 + assert all(f.session == 1 for f in files) + assert set(f.client.institute for f in files) == set(['IDIAP']) + assert all(f.client.orig_id >= 41 and f.client.orig_id < 61 for f in files) + files = db.objects(protocol=protocol, groups='eval', purposes='probe') + assert len(files) == 15 * 8 * 1 * 5, len(files) + assert len(set(f.client.id for f in files)) == 15 + assert len(set(f.nrecording for f in files)) == 8 + assert len(set(f.device for f in files)) == 1 + assert len(set(f.session for f in files)) == 5 + assert all(f.session > 1 for f in files) + assert set(f.client.institute for f in files) == set(['IDIAP']) + assert all(f.client.orig_id >= 41 and f.client.orig_id < 61 for f in files) diff --git a/doc/conf.py b/doc/conf.py index 64c4c51..880515a 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -232,7 +232,6 @@ autodoc_member_order = 'bysource' autodoc_default_flags = [ 'members', 'undoc-members', - 'inherited-members', 'show-inheritance', ] diff --git a/doc/index.rst b/doc/index.rst index 6d33f00..21ff457 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -1,17 +1,23 @@ .. vim: set fileencoding=utf-8 : -.. Andre Anjos <andre.anjos@idiap.ch> -.. Mon 13 Aug 2012 12:36:40 CEST .. _bob.db.swan: -===================== - Bob Example Project -===================== +================================== + SWAN Database Access API for Bob +================================== + +To use this database, you may need to download additional files: + +.. code-block:: sh + + $ bob_dbmanage.py swan download --missing Package Documentation --------------------- .. automodule:: bob.db.swan +.. automodule:: bob.db.swan.query +.. automodule:: bob.db.swan.models diff --git a/doc/nitpick-exceptions.txt b/doc/nitpick-exceptions.txt new file mode 100644 index 0000000..5c331f3 --- /dev/null +++ b/doc/nitpick-exceptions.txt @@ -0,0 +1,2 @@ +py:class sqlalchemy.ext.declarative.api.Base +py:exc ValueError diff --git a/requirements.txt b/requirements.txt index 7b607ad..a8c1793 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,19 @@ setuptools -egit+git@gitlab.idiap.ch:bob/bob.extension.git#egg=bob.extension +bob.extension -egit+git@gitlab.idiap.ch:bob/bob.blitz.git#egg=bob.blitz +bob.blitz -egit+git@gitlab.idiap.ch:bob/bob.core.git#egg=bob.core +bob.core -egit+git@gitlab.idiap.ch:bob/bob.io.base.git#egg=bob.io.base +bob.io.base -egit+git@gitlab.idiap.ch:bob/bob.io.image.git#egg=bob.io.image +bob.io.image -egit+git@gitlab.idiap.ch:bob/bob.io.video.git#egg=bob.io.video +bob.io.video -egit+git@gitlab.idiap.ch:bob/bob.db.base.git#egg=bob.db.base +bob.db.base +-egit+git@gitlab.idiap.ch:bob/bob.bio.base.git#egg=bob.bio.base +bob.bio.base +-egit+git@gitlab.idiap.ch:bob/bob.bio.spear.git#egg=bob.bio.spear +bob.bio.spear diff --git a/setup.py b/setup.py index e17ca5d..13d77ec 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ setup( version=open("version.txt").read().rstrip(), description='SWAN Database Access API for Bob', - url='', + url='https://gitlab.idiap.ch/bob/bob.db.swan', license='BSD', author='Amir Mohammadi', author_email='amir.mohammadi@idiap.ch', @@ -37,10 +37,6 @@ setup( entry_points={ 'bob.db': ['swan = bob.db.swan.driver:Interface'], - 'console_scripts': [ - 'bob_db_swan_generate_filelist = ' - 'bob.db.swan.script.generate_filelist:main', - ], }, classifiers=[ -- GitLab