From 098da6ea44502cbe0b3ded55a1a892ff1da4c2ed Mon Sep 17 00:00:00 2001
From: Amir MOHAMMADI <amir.mohammadi@idiap.ch>
Date: Wed, 17 May 2017 12:58:37 +0200
Subject: [PATCH] Initial version of bob.db.swan

---
 .gitignore                              |   1 +
 MANIFEST.in                             |   2 +-
 bob/db/swan/create.py                   | 584 +++++++++++++++---------
 bob/db/swan/models.py                   |  29 +-
 bob/db/swan/query.py                    | 503 ++++----------------
 bob/db/swan/script/__init__.py          |   0
 bob/db/swan/script/generate_filelist.py |  13 -
 bob/db/swan/test.py                     |  51 +++
 doc/conf.py                             |   1 -
 doc/index.rst                           |  16 +-
 doc/nitpick-exceptions.txt              |   2 +
 requirements.txt                        |  11 +
 setup.py                                |   6 +-
 13 files changed, 557 insertions(+), 662 deletions(-)
 delete mode 100644 bob/db/swan/script/__init__.py
 delete mode 100644 bob/db/swan/script/generate_filelist.py
 create mode 100644 doc/nitpick-exceptions.txt

diff --git a/.gitignore b/.gitignore
index befa401..3d37733 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,3 +12,4 @@ develop-eggs
 sphinx
 dist
 conda
+*.sql3
diff --git a/MANIFEST.in b/MANIFEST.in
index 9d10504..dc02b40 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,2 +1,2 @@
-include README.rst bootstrap-buildout.py buildout.cfg COPYING version.txt requirements.txt
+include README.rst LICENSE version.txt requirements.txt
 recursive-include doc *.py *.rst *.ico *.png
diff --git a/bob/db/swan/create.py b/bob/db/swan/create.py
index a584d88..577b0d5 100644
--- a/bob/db/swan/create.py
+++ b/bob/db/swan/create.py
@@ -4,9 +4,249 @@
 """This script creates the SWAN database in a single pass.
 """
 
-import os
-
 from .models import Client, File, Protocol, ProtocolPurpose, Base
+from collections import defaultdict
+from glob import glob
+import json
+import os
+from sqlalchemy import and_
+
+SITE_MAPPING = {
+    '1': 'NTNU',
+    '2': 'UIO',
+    '3': 'MPH-FRA',
+    '4': 'IDIAP',
+    '6': 'MPH-IND',
+}
+
+MODALITY_MAPPING = {
+    '1': 'face',
+    '2': 'voice',
+    '3': 'eye',
+    '4': 'finger',
+}
+
+DEVICE_MAPPING = {
+    'p': 'iPhone',
+    't': 'iPad',
+}
+
+SESSION1_DATAFORMAT = '''01 p 1.png:rear,4032x3024
+02 p 1.png:rear,4032x3024
+03 p 1.png:rear,4032x3024
+04 p 1.png:rear,4032x3024
+05 p 1.png:rear,4032x3024
+01 p 1.mp4:rear,1280x720,5s
+02 p 1.mp4:rear,1280x720,5s
+06 p 1.png:front,2576x1932
+07 p 1.png:front,2576x1932
+08 p 1.png:front,2576x1932
+09 p 1.png:front,2576x1932
+10 p 1.png:front,2576x1932
+03 p 1.mp4:front,1280x720,5s
+04 p 1.mp4:front,1280x720,5s
+01 t 1.png:rear,3264x2448
+02 t 1.png:rear,3264x2448
+03 t 1.png:rear,3264x2448
+04 t 1.png:rear,3264x2448
+05 t 1.png:rear,3264x2448
+01 t 1.mp4:rear,1280x720,5s
+02 t 1.mp4:rear,1280x720,5s
+06 t 1.png:front,1280x960
+07 t 1.png:front,1280x960
+08 t 1.png:front,1280x960
+09 t 1.png:front,1280x960
+10 t 1.png:front,1280x960
+03 t 1.mp4:front,1280x720,5s
+04 t 1.mp4:front,1280x720,5s
+01 p 2.mp4:front,1280x720
+02 p 2.mp4:front,1280x720
+03 p 2.mp4:front,1280x720
+04 p 2.mp4:front,1280x720
+05 p 2.mp4:front,1280x720
+06 p 2.mp4:front,1280x720
+07 p 2.mp4:front,1280x720
+08 p 2.mp4:front,1280x720
+01 t 2.mp4:front,1280x720
+02 t 2.mp4:front,1280x720
+03 t 2.mp4:front,1280x720
+04 t 2.mp4:front,1280x720
+05 t 2.mp4:front,1280x720
+06 t 2.mp4:front,1280x720
+07 t 2.mp4:front,1280x720
+08 t 2.mp4:front,1280x720
+01 p 3.png:rear,4032x3024
+02 p 3.png:rear,4032x3024
+03 p 3.png:rear,4032x3024
+04 p 3.png:rear,4032x3024
+05 p 3.png:rear,4032x3024
+01 p 3.mp4:rear,1280x720,5s
+02 p 3.mp4:rear,1280x720,5s
+06 p 3.png:front,2576x1932
+07 p 3.png:front,2576x1932
+08 p 3.png:front,2576x1932
+09 p 3.png:front,2576x1932
+10 p 3.png:front,2576x1932
+03 p 3.mp4:front,1280x720,5s
+04 p 3.mp4:front,1280x720,5s
+01 t 3.png:rear,3264x2448
+02 t 3.png:rear,3264x2448
+03 t 3.png:rear,3264x2448
+04 t 3.png:rear,3264x2448
+05 t 3.png:rear,3264x2448
+01 t 3.mp4:rear,1280x720,5s
+02 t 3.mp4:rear,1280x720,5s
+06 t 3.png:front,1280x960
+07 t 3.png:front,1280x960
+08 t 3.png:front,1280x960
+09 t 3.png:front,1280x960
+10 t 3.png:front,1280x960
+03 t 3.mp4:front,1280x720,5s
+04 t 3.mp4:front,1280x720,5s
+01 p 4.png:rear,4032x3024
+02 p 4.png:rear,4032x3024
+03 p 4.png:rear,4032x3024
+04 p 4.png:rear,4032x3024
+05 p 4.png:rear,4032x3024
+01 p 4.mp4:rear,1280x720,5s
+02 p 4.mp4:rear,1280x720,5s
+06 p 4.png:rear,4032x3024
+07 p 4.png:rear,4032x3024
+08 p 4.png:rear,4032x3024
+09 p 4.png:rear,4032x3024
+10 p 4.png:rear,4032x3024
+03 p 4.mp4:rear,1280x720,5s
+04 p 4.mp4:rear,1280x720,5s
+11 p 4.png:rear,4032x3024
+12 p 4.png:rear,4032x3024
+13 p 4.png:rear,4032x3024
+14 p 4.png:rear,4032x3024
+15 p 4.png:rear,4032x3024
+05 p 4.mp4:rear,1280x720,5s
+06 p 4.mp4:rear,1280x720,5s
+16 p 4.png:rear,4032x3024
+17 p 4.png:rear,4032x3024
+18 p 4.png:rear,4032x3024
+19 p 4.png:rear,4032x3024
+20 p 4.png:rear,4032x3024
+07 p 4.mp4:rear,1280x720,5s
+08 p 4.mp4:rear,1280x720,5s
+01 t 4.png:rear,3264x2448
+02 t 4.png:rear,3264x2448
+03 t 4.png:rear,3264x2448
+04 t 4.png:rear,3264x2448
+05 t 4.png:rear,3264x2448
+01 t 4.mp4:rear,1280x720,5s
+02 t 4.mp4:rear,1280x720,5s
+06 t 4.png:rear,3264x2448
+07 t 4.png:rear,3264x2448
+08 t 4.png:rear,3264x2448
+09 t 4.png:rear,3264x2448
+10 t 4.png:rear,3264x2448
+03 t 4.mp4:rear,1280x720,5s
+04 t 4.mp4:rear,1280x720,5s
+11 t 4.png:rear,3264x2448
+12 t 4.png:rear,3264x2448
+13 t 4.png:rear,3264x2448
+14 t 4.png:rear,3264x2448
+15 t 4.png:rear,3264x2448
+05 t 4.mp4:rear,1280x720,5s
+06 t 4.mp4:rear,1280x720,5s
+16 t 4.png:rear,3264x2448
+17 t 4.png:rear,3264x2448
+18 t 4.png:rear,3264x2448
+19 t 4.png:rear,3264x2448
+20 t 4.png:rear,3264x2448
+07 t 4.mp4:rear,1280x720,5s
+08 t 4.mp4:rear,1280x720,5s
+'''
+
+SESSION2_DATAFORMAT = '''01 p 1.mp4:front,1280x720,5s
+02 p 1.mp4:front,1280x720,5s
+01 p 2.mp4:front,1280x720
+02 p 2.mp4:front,1280x720
+03 p 2.mp4:front,1280x720
+04 p 2.mp4:front,1280x720
+05 p 2.mp4:front,1280x720
+06 p 2.mp4:front,1280x720
+07 p 2.mp4:front,1280x720
+08 p 2.mp4:front,1280x720
+01 p 3.png:rear,4032x3024
+02 p 3.png:rear,4032x3024
+03 p 3.png:rear,4032x3024
+04 p 3.png:rear,4032x3024
+05 p 3.png:rear,4032x3024
+01 p 3.mp4:rear,1280x720,5s
+02 p 3.mp4:rear,1280x720,5s
+06 p 3.png:front,2576x1932
+07 p 3.png:front,2576x1932
+08 p 3.png:front,2576x1932
+09 p 3.png:front,2576x1932
+10 p 3.png:front,2576x1932
+03 p 3.mp4:front,1280x720,5s
+04 p 3.mp4:front,1280x720,5s
+01 p 4.png:rear,4032x3024
+02 p 4.png:rear,4032x3024
+03 p 4.png:rear,4032x3024
+04 p 4.png:rear,4032x3024
+05 p 4.png:rear,4032x3024
+01 p 4.mp4:rear,1280x720,5s
+02 p 4.mp4:rear,1280x720,5s
+06 p 4.png:rear,4032x3024
+07 p 4.png:rear,4032x3024
+08 p 4.png:rear,4032x3024
+09 p 4.png:rear,4032x3024
+10 p 4.png:rear,4032x3024
+03 p 4.mp4:rear,1280x720,5s
+04 p 4.mp4:rear,1280x720,5s
+11 p 4.png:rear,4032x3024
+12 p 4.png:rear,4032x3024
+13 p 4.png:rear,4032x3024
+14 p 4.png:rear,4032x3024
+15 p 4.png:rear,4032x3024
+05 p 4.mp4:rear,1280x720,5s
+06 p 4.mp4:rear,1280x720,5s
+16 p 4.png:rear,4032x3024
+17 p 4.png:rear,4032x3024
+18 p 4.png:rear,4032x3024
+19 p 4.png:rear,4032x3024
+20 p 4.png:rear,4032x3024
+07 p 4.mp4:rear,1280x720,5s
+08 p 4.mp4:rear,1280x720,5s
+'''
+
+KNOWLEDGE = {
+    '01': {'total': 128, 'video': 24, 'image': 40, 'device': ['p', 't']},
+    '02': {'total': 52, 'video': 22, 'image': 30, 'device': ['p']},
+    '03': {'total': 52, 'video': 22, 'image': 30, 'device': ['p']},
+    '04': {'total': 52, 'video': 22, 'image': 30, 'device': ['p']},
+    '05': {'total': 52, 'video': 22, 'image': 30, 'device': ['p']},
+    '06': {'total': 52, 'video': 22, 'image': 30, 'device': ['p']},
+    '07': {'total': 52, 'video': 22, 'image': 30, 'device': ['p']},
+}
+
+
+def parse_data_format(lines):
+    data_format = defaultdict(
+        lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(list))))
+    for line in lines.split('\n'):
+        if not line:
+            continue
+        nrecording, device, biometrics = line.split()
+        biometrics, extension = biometrics.split('.')
+        extension, data = extension.split(':')
+        data = data.split(',')
+        data_format[device][biometrics][extension][nrecording] = data
+    return json.loads(json.dumps(data_format))
+
+
+KNOWLEDGE['01']['data'] = parse_data_format(SESSION1_DATAFORMAT)
+__session2_data_format_loaded = parse_data_format(SESSION2_DATAFORMAT)
+KNOWLEDGE['02']['data'] = __session2_data_format_loaded
+KNOWLEDGE['03']['data'] = __session2_data_format_loaded
+KNOWLEDGE['04']['data'] = __session2_data_format_loaded
+KNOWLEDGE['05']['data'] = __session2_data_format_loaded
+KNOWLEDGE['06']['data'] = __session2_data_format_loaded
 
 
 def nodot(item):
@@ -14,168 +254,127 @@ def nodot(item):
     return item[0] != '.'
 
 
-def add_clients_and_files(sql_session, datadir, extensions, verbose):
+def add_clients_and_files(sql_session, datadir, verbose):
     """Add files to the SWAN database."""
 
-    def add_file(session, datadir, location, client_id_dir, session_device, basename, verbose):
+    client_dict = {}
+
+    def add_file(sql_session, fullpath, verbose):
         """Parse a single filename and add it to the list.
-             Also add a client entry if not already in the database."""
-        v = os.path.splitext(basename)[0].split('_')
-        bname = os.path.splitext(basename)[0]
+        Also add a client entry if not already in the database.
+        Example: IDIAP/session_01/iPhone/00001/4_00001_m_01_01_p_1.mp4
+
+        """
+        basename = os.path.basename(fullpath)
+        site, identity, gender, session, nrecording, device, biometrics = \
+            basename.split('_')
+        biometrics, extension = biometrics.split('.')
+
+        try:
+            camera = KNOWLEDGE[session]['data'][
+                device][biometrics][extension][nrecording]
+        except KeyError:
+            print('File found but not in KNOWLEDGE! {}'.format(fullpath))
+            return
+        camera = camera[0]
+        modality = MODALITY_MAPPING[biometrics]
+        site = SITE_MAPPING[site]
+        device = DEVICE_MAPPING[device]
+
+        if extension == 'mp4':
+            recording = 'video'
+        elif extension == 'png':
+            recording = 'photo'
+        else:
+            raise RuntimeError('Unknown file extension {}'.format(extension))
+
         full_bname = os.path.join(
-            location, client_id_dir, session_device, bname)
+            site, 'session_{}'.format(session), device, identity,
+            basename)
 
-        gender = ''
-        if v[0][0] == 'm':
+        if gender == 'm':
             gender = 'male'
-        if v[0][0] == 'f':
+        elif gender == 'f':
             gender = 'female'
-        institute = int(v[0][1])
-        institute_dir = ''
-        if institute == 0:
-            institute = 'idiap'
-            institute_dir = 'idiap'
-        elif institute == 1:
-            institute = 'manchester'
-            institute_dir = 'uman'
-        elif institute == 2:
-            institute = 'surrey'
-            institute_dir = 'unis'
-        elif institute == 3:
-            institute = 'oulu'
-            institute_dir = 'uoulu'
-        elif institute == 4:
-            institute = 'brno'
-            institute_dir = 'but'
-        elif institute == 5:
-            institute = 'avignon'
-            institute_dir = 'lia'
-        if institute_dir != location:
-            error_msg = "File: %s -- Find location %s in directory of location %s!" % (
-                full_bname, location, institute_dir)
-            raise RuntimeError(error_msg)
-        client_id = v[0][1:4]
-        if v[0][0:4] != client_id_dir:
-            error_msg = "File: %s -- Find identity %s in directory of identity %s!" % (
-                full_bname, v[0][0:4], client_id)
-            raise RuntimeError(error_msg)
-        if not (client_id in client_dict):
-            if (institute == 'surrey' or institute == 'avignon'):
-                group = 'world'
-            elif (institute == 'manchester' or institute == 'oulu'):
-                group = 'dev'
-            elif (institute == 'idiap' or institute == 'brno'):
-                group = 'eval'
+        else:
+            raise RuntimeError('Unknown gender {}'.format(gender))
+
+        # identify the group of this identity
+        identity = int(identity)
+        if identity < 25:
+            group = 'world'
+        elif identity < 41:
+            group = 'dev'
+        elif identity < 61:
+            group = 'eval'
+        else:
+            raise RuntimeError('Unknown IDIAP identity {}'.format(identity))
+
+        # check if the client has been added yet.
+        client_key = (identity, site)
+        if client_key not in client_dict:
             if verbose > 1:
-                print("  Adding client %d..." % int(client_id))
-            session.add(Client(int(client_id), group, gender, institute))
-            client_dict[client_id] = True
-
-        w = session_device.split('_')
-        session_id_from_dir = int(w[0])
-        device_from_dir = w[1]
-
-        session_id = int(v[1])
-        speech_type = v[2][0]
-        shot_id = v[2][1:3]
-        environment = v[3][0]
-        device = v[3][1]
-        if(device == '0'):
-            device = 'mobile'
-        elif(device == '1'):
-            device = 'laptop'
-        if device != device_from_dir:
-            error_msg = "File: %s -- Find device %s in directory of device %s!" % (
-                full_bname, device, device_from_dir)
-            raise RuntimeError(error_msg)
-        if session_id != session_id_from_dir:
-            error_msg = "File: %s -- Find session_id %d in directory of session_id %d!" % (
-                full_bname, session_id, session_id_from_dir)
-            raise RuntimeError(error_msg)
-        channel = int(v[4][0])
-
+                print("    Adding client {}, {}...".format(site, identity))
+            client = Client(identity, group, gender, site)
+            sql_session.add(client)
+            # sql_session.flush()
+            # sql_session.refresh(client)
+            client_dict[client_key] = True
+        else:
+            client = sql_session.query(Client).filter(
+                and_(Client.orig_id == identity,
+                     Client.institute == site)).one()
         if verbose > 1:
             print("    Adding file '%s'..." % full_bname)
-        session.add(File(int(client_id), full_bname, session_id,
-                         speech_type, shot_id, environment, device, channel))
+        sql_session.add(File(full_bname, client, int(session), device,
+                             modality, camera, recording, int(nrecording)))
 
-    client_dict = {}
     if verbose:
         print("Adding clients and files ...")
-    for location in filter(nodot, os.listdir(datadir)):
-        location_dir = os.path.join(datadir, location)
-        if os.path.isdir(location_dir):
-            for client_id in filter(nodot, os.listdir(location_dir)):
-                client_dir = os.path.join(location_dir, client_id)
-                if os.path.isdir(client_dir):
-                    for session_device in filter(nodot, os.listdir(client_dir)):
-                        session_device_dir = os.path.join(
-                            client_dir, session_device)
-                        if os.path.isdir(session_device_dir):
-                            for filename in filter(nodot, os.listdir(session_device_dir)):
-                                for ext in extensions:
-                                    if filename.endswith(ext):
-                                        add_file(sql_session, datadir, location, client_id, session_device, os.path.basename(
-                                            filename), verbose)
+    # files = open('files.txt').read().split()
+    files = []
+    for _, site in SITE_MAPPING.items():
+        files += glob(os.path.join(datadir, site, '*', '*', '*', '*.mp4'))
+        files += glob(os.path.join(datadir, site, '*', '*', '*', '*.png'))
+    for fullpath in files:
+        add_file(sql_session, fullpath, verbose)
 
 
 def add_protocols(session, verbose):
     """Adds protocols"""
 
     # 1. DEFINITIONS
+    mobile_tablet = ('iPhone', 'iPad')
+    mobile = ('iPhone',)
+    ididap_clients = list(range(1, 61))
+    non_idiap_clients = [7, 8, 9, 23, 37, 44, 45, 53, 56, 57]
+    ididap_clients = [x for x in ididap_clients if x not in non_idiap_clients]
+
     # Numbers in the lists correspond to session identifiers
-    protocol_definitions = {}
-
-    # Split male and female clients: list of (client_id, first_session_id) #
-    # few exceptions with 2 as first session
-    clients_male = [(1, 1), (2, 1), (4, 1), (8, 1), (11, 1), (12, 1), (15, 1), (16, 1), (17, 1), (19, 2),
-                    (21, 1), (23, 1), (24, 1), (25, 1), (26,
-                                                         1), (28, 1), (29, 1), (30, 1), (31, 1), (33, 1),
-                    (34, 1), (103, 1), (104, 1), (106, 1), (107,
-                                                            1), (108, 1), (109, 1), (111, 1), (112, 1), (114, 1),
-                    (115, 1), (116, 1), (117, 1), (119, 1), (120,
-                                                             1), (301, 1), (304, 1), (305, 1), (308, 1), (310, 1),
-                    (313, 1), (314, 1), (315, 1), (317, 1), (319,
-                                                             1), (416, 1), (417, 1), (418, 1), (419, 1), (420, 1),
-                    (421, 1), (422, 1), (423, 1), (424, 1), (425,
-                                                             1), (426, 1), (427, 1), (428, 1), (429, 1), (430, 1),
-                    (431, 1), (432, 1)]
-    clients_female = [(7, 2), (9, 1), (10, 1), (22, 1), (32, 1), (118, 1), (122, 1), (123, 1), (125, 1), (126, 1),
-                      (127, 1), (128, 1), (129, 1), (130, 1), (131,
-                                                               1), (133, 1), (302, 1), (303, 1), (306, 1), (307, 1),
-                      (309, 1), (311, 1), (320, 1), (401, 1), (402,
-                                                               1), (403, 1), (404, 1), (405, 2), (406, 1), (407, 1),
-                      (408, 1), (409, 1), (410, 1), (411, 1), (412, 1), (413, 1), (415, 1), (433, 1)]
-    train_mobile = ['mobile']
-    train_all = None
-    enroll_laptop = [['laptop'], ['p']]
-    enroll_mobile = [['mobile'], ['p']]
-    enroll_laptop_mobile = [['laptop', 'mobile'], ['p']]
-    probe = [['mobile'], ['r', 'f']]
-    gender_male = 'male'
-    gender_female = 'female'
-    protocol_definitions['mobile0-male'] = [clients_male,
-                                            train_mobile, enroll_mobile, probe, gender_male]
-    protocol_definitions['mobile0-female'] = [clients_female,
-                                              train_mobile, enroll_mobile, probe, gender_female]
-    protocol_definitions['mobile1-male'] = [clients_male,
-                                            train_all, enroll_mobile, probe, gender_male]
-    protocol_definitions['mobile1-female'] = [clients_female,
-                                              train_all, enroll_mobile, probe, gender_female]
-    protocol_definitions['laptop1-male'] = [clients_male,
-                                            train_all, enroll_laptop, probe, gender_male]
-    protocol_definitions['laptop1-female'] = [clients_female,
-                                              train_all, enroll_laptop, probe, gender_female]
-    protocol_definitions['laptop_mobile1-male'] = [clients_male,
-                                                   train_all, enroll_laptop_mobile, probe, gender_male]
-    protocol_definitions['laptop_mobile1-female'] = [clients_female,
-                                                     train_all, enroll_laptop_mobile, probe, gender_female]
+    protocol_definitions = defaultdict(lambda: defaultdict(dict))
+    protocol_definitions['idiap0-audio']['world']['train'] = \
+        (mobile_tablet, [x for x in ididap_clients if x < 25],
+         ('voice',), ('front',), ('video',), tuple(range(1, 7)), ('IDIAP', ))
+    protocol_definitions['idiap0-audio']['dev']['enroll'] = \
+        (mobile, [x for x in ididap_clients if (x >= 25) and (x < 41)],
+         ('voice',), ('front',), ('video',), tuple(range(1, 2)), ('IDIAP', ))
+    protocol_definitions['idiap0-audio']['dev']['probe'] = \
+        (mobile, [x for x in ididap_clients if (x >= 25) and (x < 41)],
+         ('voice',), ('front',), ('video',), tuple(range(2, 7)), ('IDIAP', ))
+    protocol_definitions['idiap0-audio']['eval']['enroll'] = \
+        (mobile, [x for x in ididap_clients if (x >= 41) and (x < 61)],
+         ('voice',), ('front',), ('video',), tuple(range(1, 2)), ('IDIAP', ))
+    protocol_definitions['idiap0-audio']['eval']['probe'] = \
+        (mobile, [x for x in ididap_clients if (x >= 41) and (x < 61)],
+         ('voice',), ('front',), ('video',), tuple(range(2, 7)), ('IDIAP', ))
 
     # 2. ADDITIONS TO THE SQL DATABASE
-    protocolPurpose_list = [('world', 'train'), ('dev', 'enroll'),
-                            ('dev', 'probe'), ('eval', 'enroll'), ('eval', 'probe')]
+    protocolPurpose_list = [
+        ('world', 'train'), ('dev', 'enroll'), ('dev', 'probe'),
+        ('eval', 'enroll'), ('eval', 'probe')]
+
     for proto in protocol_definitions:
-        p = Protocol(proto, protocol_definitions[proto][4])
+        p = Protocol(proto)
         # Add protocol
         if verbose:
             print("Adding protocol '%s'..." % (proto))
@@ -184,81 +383,48 @@ def add_protocols(session, verbose):
         session.refresh(p)
 
         # Add protocol purposes
-        for key in range(len(protocolPurpose_list)):
-            purpose = protocolPurpose_list[key]
-            pu = ProtocolPurpose(p.id, purpose[0], purpose[1])
+        for group, purpose in protocolPurpose_list:
+            pu = ProtocolPurpose(p.id, group, purpose)
             if verbose > 1:
                 print("  Adding protocol purpose ('%s','%s')..." %
-                      (purpose[0], purpose[1]))
+                      (group, purpose))
             session.add(pu)
             session.flush()
             session.refresh(pu)
 
-            # Add files attached with this protocol purpose
-            client_group = ""
-            device_list = []
-            speech_list = []
-            if(key == 0):
-                client_group = "world"
-            elif(key == 1 or key == 2):
-                client_group = "dev"
-            elif(key == 3 or key == 4):
-                client_group = "eval"
-            if(key == 0):
-                world_list = True
-                session_list_in = False
-                device_list = protocol_definitions[proto][1]
-            if(key == 1 or key == 3):
-                world_list = False
-                session_list_in = True
-                device_list = protocol_definitions[proto][2][0]
-                speech_list = protocol_definitions[proto][2][1]
-            elif(key == 2 or key == 4):
-                world_list = False
-                session_list_in = False
-                device_list = protocol_definitions[proto][3][0]
-                speech_list = protocol_definitions[proto][3][1]
+            # get the list of files for that group
+            q = session.query(File).join(Client).filter(
+                Client.sgroup == group).order_by(File.id)
 
-            # Adds 'protocol' files
-            # World set
-            if world_list:
-                q = session.query(File).join(Client).filter(
-                    Client.sgroup == 'world').order_by(File.id)
-                if device_list:
-                    q = q.filter(File.device.in_(device_list))
-                for k in q:
-                    if verbose > 1:
-                        print("    Adding protocol file '%s'..." % (k.path))
-                    pu.files.append(k)
-            # Dev/eval set
-            else:
-                for client in protocol_definitions[proto][0]:
-                    cid = client[0]  # client id
-                    sid = client[1]  # session id
-                    q = session.query(File).join(Client).\
-                        filter(Client.sgroup == client_group).filter(
-                            Client.id == cid)
-                    if session_list_in:
-                        q = q.filter(File.session_id == sid)
-                    else:
-                        q = q.filter(File.session_id != sid)
-                    if device_list:
-                        q = q.filter(File.device.in_(device_list))
-                    if speech_list:
-                        q = q.filter(File.speech_type.in_(speech_list))
-                    q = q.order_by(File.id)
-                    for k in q:
-                        if verbose > 1:
-                            print("    Adding protocol file '%s'..." %
-                                  (k.path))
-                        pu.files.append(k)
+            device_list = protocol_definitions[proto][group][purpose][0]
+            if device_list:
+                q = q.filter(File.device.in_(device_list))
 
-        # Add protocol
-        speech_type = ['p', 'l', 'r', 'f']
-        mobile_only = False
-        if 'mobile0' in proto:
-            mobile_only = True
-        add_tmodels(session, p.id, mobile_only, speech_type, verbose)
+            modality_list = protocol_definitions[proto][group][purpose][2]
+            if modality_list:
+                q = q.filter(File.modality.in_(modality_list))
+
+            camera_list = protocol_definitions[proto][group][purpose][3]
+            if camera_list:
+                q = q.filter(File.camera.in_(camera_list))
+
+            recording_list = protocol_definitions[proto][group][purpose][4]
+            if recording_list:
+                q = q.filter(File.recording.in_(recording_list))
+
+            session_list = protocol_definitions[proto][group][purpose][5]
+            if session_list:
+                q = q.filter(File.session.in_(session_list))
+
+            institute_list = protocol_definitions[proto][group][purpose][6]
+            if institute_list:
+                q = q.filter(Client.institute.in_(institute_list))
+
+            # Adds 'protocol' files
+            for k in q:
+                if verbose > 1:
+                    print("    Adding protocol file '%s'..." % (k.path))
+                pu.files.append(k)
 
 
 def create_tables(args):
@@ -293,7 +459,7 @@ def create(args):
     # the real work...
     create_tables(args)
     s = session_try_nolock(args.type, args.files[0], echo=(args.verbose > 2))
-    add_files(s, args.datadir, args.extensions, args.verbose)
+    add_clients_and_files(s, args.datadir, args.verbose)
     add_protocols(s, args.verbose)
     s.commit()
     s.close()
diff --git a/bob/db/swan/models.py b/bob/db/swan/models.py
index cb1b5cd..702fc27 100644
--- a/bob/db/swan/models.py
+++ b/bob/db/swan/models.py
@@ -59,26 +59,43 @@ class File(Base, bob.db.base.File):
 
     # Key identifier for the file
     id = Column(Integer, primary_key=True)
-    # Key identifier of the client associated with this file
-    client_id = Column(Integer, ForeignKey('client.id'))  # for SQL
     # Unique path to this file inside the database
     path = Column(String(100), unique=True)
     # Identifier of the session
-    session_id = Column(Integer)
+    session = Column(Integer)
     # Identifier of the device
-    device_choices = ('mobile', 'tablet')
+    device_choices = ('iPhone', 'iPad')
     device = Column(Enum(*device_choices))
 
+    modality_choices = ('face', 'voice', 'eye', 'finger')
+    modality = Column(Enum(*modality_choices))
+
+    camera_choices = ('rear', 'front')
+    camera = Column(Enum(*camera_choices))
+
+    recording_choices = ('video', 'photo')
+    recording = Column(Enum(*recording_choices))
+
+    nrecording = Column(Integer)
+
+    # Key identifier of the client associated with this file
+    client_id = Column(Integer, ForeignKey('client.id'))  # for SQL
     # For Python: A direct link to the client object that this file belongs to
     client = relationship("Client", backref=backref("files", order_by=id))
 
-    def __init__(self, client_id, path, session_id, device):
+    def __init__(self, path, client, session, device, modality,
+                 camera, recording, nrecording):
         # call base class constructor
         bob.db.base.File.__init__(self, path=path)
 
         # fill the remaining bits of the file information
-        self.client_id = client_id
+        self.client = client
+        self.session = session
         self.device = device
+        self.modality = modality
+        self.camera = camera
+        self.recording = recording
+        self.nrecording = nrecording
 
 
 class Protocol(Base):
diff --git a/bob/db/swan/query.py b/bob/db/swan/query.py
index 00b11d4..e8876c8 100644
--- a/bob/db/swan/query.py
+++ b/bob/db/swan/query.py
@@ -2,29 +2,30 @@
 # vim: set fileencoding=utf-8 :
 
 """This module provides the Dataset interface allowing the user to query the
-MOBIO database in the most obvious ways.
+SWAN database in the most obvious ways.
 """
 
-import six
-from .models import *
 from .driver import Interface
+from .models import File, Client, Protocol, ProtocolPurpose
 from sqlalchemy import and_, not_
-
 import bob.db.base
 
 SQLITE_FILE = Interface().files()[0]
 
 
 class Database(bob.db.base.SQLiteDatabase):
-    """The dataset class opens and maintains a connection opened to the Database.
+    """The dataset class opens and maintains a connection opened to the
+    Database.
 
-    It provides many different ways to probe for the characteristics of the data
-    and for the data itself inside the database.
+    It provides many different ways to probe for the characteristics of the
+    data and for the data itself inside the database.
     """
 
-    def __init__(self, original_directory=None, original_extension=None, annotation_directory=None, annotation_extension='.pos'):
+    def __init__(self, original_directory=None, original_extension=None,
+                 annotation_directory=None, annotation_extension='.pos'):
         # call base class constructors to open a session to the database
-        super(Database, self).__init__(SQLITE_FILE, File)
+        super(Database, self).__init__(
+            SQLITE_FILE, File, original_directory, original_extension)
 
         self.annotation_directory = annotation_directory
         self.annotation_extension = annotation_extension
@@ -32,85 +33,73 @@ class Database(bob.db.base.SQLiteDatabase):
     def groups(self, protocol=None):
         """Returns the names of all registered groups"""
 
-        return ProtocolPurpose.group_choices
+        return Client.group_choices
 
     def genders(self):
         """Returns the list of genders"""
 
         return Client.gender_choices
 
-    def subworld_names(self):
-        """Returns all registered subworld names"""
+    def institutes(self):
+        """Returns the list of institutes"""
 
-        self.assert_validity()
-        l = self.subworlds()
-        retval = [str(k.name) for k in l]
+        return Client.institute_choices
+
+    def protocol_names(self):
+        """Returns all registered protocol names"""
+
+        retval = [str(k.name) for k in self.protocols()]
         return retval
 
-    def subworlds(self):
-        """Returns the list of subworlds"""
+    def protocols(self):
+        """Returns all registered protocols"""
 
-        return list(self.query(Subworld))
+        return list(self.query(Protocol))
 
-    def has_subworld(self, name):
-        """Tells if a certain subworld is available"""
+    def has_protocol(self, name):
+        """Tells if a certain protocol is available"""
 
-        self.assert_validity()
-        return self.query(Subworld).filter(Subworld.name == name).count() != 0
-
-    def _replace_protocol_alias(self, protocol):
-        if protocol == 'male':
-            return 'mobile0-male'
-        elif protocol == 'female':
-            return 'mobile0-female'
-        else:
-            return protocol
-
-    def _replace_protocols_alias(self, protocol):
-        # print(protocol)
-        if protocol:
-            from six import string_types
-            if isinstance(protocol, string_types):
-                # print([self._replace_protocol_alias(protocol)])
-                return [self._replace_protocol_alias(protocol)]
-            else:
-                # print(list(set(self._replace_protocol_alias(k) for k in protocols)))
-                return list(set(self._replace_protocol_alias(k) for k in protocols))
-        else:
-            return None
+        return self.query(Protocol).filter(Protocol.name == name).count() != 0
 
-    def clients(self, protocol=None, groups=None, subworld=None, gender=None):
-        """Returns a list of Clients for the specific query by the user.
+    def protocol(self, name):
+        """Returns the protocol object in the database given a certain name.
+        Raises an error if that does not exist."""
 
-        Keyword Parameters:
+        return self.query(Protocol).filter(Protocol.name == name).one()
 
-        protocol
-            One of the MOBIO protocols ('mobile0-male', 'mobile0-female', 'mobile1-male', 'mobile1-female', \
-                'laptop1-male', 'laptop1-female', 'laptop_mobile1-male', 'laptop_mobile1-female')
-            'male'and 'female' are aliases for 'mobile0-male' and 'mobile0-female', respectively.
+    def protocol_purposes(self):
+        """Returns all registered protocol purposes"""
 
-        groups
-            The groups to which the clients belong ('dev', 'eval', 'world')
-            Please note that world data are protocol/gender independent
+        return list(self.query(ProtocolPurpose))
 
-        subworld
-            Specify a split of the world data ('onethird', 'twothirds', 'twothirds-subsampled')
-            In order to be considered, 'world' should be in groups and only one
-            split should be specified.
+    def purposes(self):
+        """Returns the list of allowed purposes"""
 
-        gender
+        return ProtocolPurpose.purpose_choices
+
+    def clients(self, protocol=None, groups=None, gender=None, institute=None):
+        """Returns a list of Clients for the specific query by the user.
+
+        Parameters
+        ----------
+        protocol : :py:obj:`str` or :py:obj:`None`, optional
+            One of the SWAN protocols. See :py:meth:`protocol_names`
+        groups : :py:obj:`str` or :py:obj:`None`, optional
+            The groups to which the clients belong ('dev', 'eval', 'world')
+        gender : :py:obj:`str` or :py:obj:`None`, optional
             The gender to consider ('male', 'female')
+        institute : :py:obj:`str` or :py:obj:`None`, optional
+            The institute of the client. See :py:meth:`institutes`.
 
-        Returns: A list containing all the clients which have the given properties.
+        Returns
+        -------
+        list of :py:class:`bob.db.swan.models.Client`
+            A list containing all the clients which have the given properties.
         """
-
-        protocol = self._replace_protocols_alias(protocol)
         protocol = self.check_parameters_for_validity(
             protocol, "protocol", self.protocol_names(), [])
         groups = self.check_parameters_for_validity(
             groups, "group", self.groups(), self.groups())
-        subworld = self.check_parameters_for_validity(
-            subworld, "subworld", self.subworld_names(), [])
         gender = self.check_parameters_for_validity(
             gender, "gender", self.genders(), [])
 
@@ -118,9 +107,6 @@ class Database(bob.db.base.SQLiteDatabase):
         retval = []
         if 'world' in groups:
             q = self.query(Client).filter(Client.sgroup == 'world')
-            if subworld:
-                q = q.join((Subworld, Client.subworld)).filter(
-                    Subworld.name.in_(subworld))
             if gender:
                 q = q.filter(Client.gender.in_(gender))
             q = q.order_by(Client.id)
@@ -158,67 +144,13 @@ class Database(bob.db.base.SQLiteDatabase):
 
         return self.query(Client).filter(Client.id == id).one()
 
-    def tclients(self, protocol=None, groups=None, subworld='onethird', gender=None):
-        """Returns a set of T-Norm clients for the specific query by the user.
-
-        Keyword Parameters:
-
-        protocol
-            One of the MOBIO protocols ('mobile0-male', 'mobile0-female', 'mobile1-male', 'mobile1-female', \
-                'laptop1-male', 'laptop1-female', 'laptop_mobile1-male', 'laptop_mobile1-female')
-            'male'and 'female' are aliases for 'mobile0-male' and 'mobile0-female', respectively.
-
-        groups
-            Ignored.
-            For the MOBIO database, this has no impact as the T-Norm clients are coming from
-            the 'world' set, and are hence the same for both the 'dev' and 'eval' sets.
-
-        subworld
-            Specify a split of the world data ('onethird', 'twothirds', 'twothirds-subsampled')
-            Please note that 'onethird' is the default value.
-
-        gender
-            The gender to consider ('male', 'female')
-
-        Returns: A list containing all the T-norm clients belonging to the given group.
-        """
-
-        return self.clients(protocol, 'world', subworld, gender)
-
-    def zclients(self, protocol=None, groups=None, subworld='onethird', gender=None):
-        """Returns a set of Z-Norm clients for the specific query by the user.
-
-        Keyword Parameters:
-
-        protocol
-            One of the MOBIO protocols ('mobile0-male', 'mobile0-female', 'mobile1-male', 'mobile1-female', \
-                'laptop1-male', 'laptop1-female', 'laptop_mobile1-male', 'laptop_mobile1-female')
-            'male'and 'female' are aliases for 'mobile0-male' and 'mobile0-female', respectively.
-
-        groups
-            Ignored.
-            For the MOBIO database, this has no impact as the Z-Norm clients are coming from
-            the 'world' set, and are hence the same for both the 'dev' and 'eval' sets.
-
-        subworld
-            Specify a split of the world data ('onethird', 'twothirds', 'twothirds-subsampled')
-            Please note that 'onethird' is the default value.
-
-        gender
-            The gender to consider ('male', 'female')
-
-        Returns: A list containing all the Z-norm clients belonging to the given group.
-        """
-
-        return self.clients(protocol, 'world', subworld, gender)
-
     def models(self, protocol=None, groups=None, subworld=None, gender=None):
         """Returns a set of models for the specific query by the user.
 
         Keyword Parameters:
 
         protocol
-            One of the MOBIO protocols ('mobile0-male', 'mobile0-female', 'mobile1-male', 'mobile1-female', \
+            One of the SWAN protocols ('mobile0-male', 'mobile0-female', 'mobile1-male', 'mobile1-female', \
                 'laptop1-male', 'laptop1-female', 'laptop_mobile1-male', 'laptop_mobile1-female')
             'male'and 'female' are aliases for 'mobile0-male' and 'mobile0-female', respectively.
 
@@ -245,7 +177,7 @@ class Database(bob.db.base.SQLiteDatabase):
         Keyword Parameters:
 
         protocol
-            One of the MOBIO protocols ('mobile0-male', 'mobile0-female', 'mobile1-male', 'mobile1-female', \
+            One of the SWAN protocols ('mobile0-male', 'mobile0-female', 'mobile1-male', 'mobile1-female', \
                 'laptop1-male', 'laptop1-female', 'laptop_mobile1-male', 'laptop_mobile1-female')
             'male'and 'female' are aliases for 'mobile0-male' and 'mobile0-female', respectively.
 
@@ -266,76 +198,6 @@ class Database(bob.db.base.SQLiteDatabase):
 
         return [client.id for client in self.clients(protocol, groups, subworld, gender)]
 
-    def tmodels(self, protocol=None, groups=None, subworld='onethird', gender=None):
-        """Returns a set of T-Norm models for the specific query by the user.
-
-        Keyword Parameters:
-
-        protocol
-            One of the MOBIO protocols ('mobile0-male', 'mobile0-female', 'mobile1-male', 'mobile1-female', \
-                'laptop1-male', 'laptop1-female', 'laptop_mobile1-male', 'laptop_mobile1-female')
-            'male'and 'female' are aliases for 'mobile0-male' and 'mobile0-female', respectively.
-
-        groups
-            Ignored.
-            For the MOBIO database, this has no impact as the T-Norm clients are coming from
-            the 'world' set, and are hence the same for both the 'dev' and 'eval' sets.
-
-        subworld
-            Specify a split of the world data ('onethird', 'twothirds', 'twothirds-subsampled')
-            Please note that 'onethird' is the default value.
-
-        gender
-            The gender to consider ('male', 'female')
-
-        Returns: A list containing all the T-norm models belonging to the given group.
-        """
-
-        protocol = self._replace_protocols_alias(protocol)
-        protocol = self.check_parameters_for_validity(
-            protocol, "protocol", self.protocol_names())
-        subworld = self.check_parameters_for_validity(
-            subworld, "subworld", self.subworld_names(), [])
-        gender = self.check_parameters_for_validity(
-            gender, "gender", self.genders(), [])
-
-        # List of the clients
-        q = self.query(TModel).join(Client).join(
-            Protocol).filter(Protocol.name.in_(protocol))
-        if subworld:
-            q = q.join((Subworld, Client.subworld)).filter(
-                Subworld.name.in_(subworld))
-        if gender:
-            q = q.filter(Client.gender.in_(gender))
-        q = q.order_by(TModel.id)
-        return list(q)
-
-    def tmodel_ids(self, protocol=None, groups=None, subworld='onethird', gender=None):
-        """Returns a list of ids of T-Norm models for the specific query by the user.
-
-        Keyword Parameters:
-
-        protocol
-            One of the MOBIO protocols ('mobile0-male', 'mobile0-female', 'mobile1-male', 'mobile1-female', \
-                'laptop1-male', 'laptop1-female', 'laptop_mobile1-male', 'laptop_mobile1-female')
-            'male'and 'female' are aliases for 'mobile0-male' and 'mobile0-female', respectively.
-
-        groups
-            Ignored.
-            For the MOBIO database, this has no impact as the T-Norm clients are coming from
-            the 'world' set, and are hence the same for both the 'dev' and 'eval' sets.
-
-        subworld
-            Specify a split of the world data ('onethird', 'twothirds', 'twothirds-subsampled')
-            Please note that 'onethird' is the default value.
-
-        gender
-            The gender to consider ('male', 'female')
-
-        Returns: A list containing the ids of all T-norm models belonging to the given group.
-        """
-        return [tmodel.mid for tmodel in self.tmodels(protocol, groups, subworld, gender)]
-
     def get_client_id_from_model_id(self, model_id, **kwargs):
         """Returns the client_id attached to the given model_id
 
@@ -349,13 +211,13 @@ class Database(bob.db.base.SQLiteDatabase):
         return model_id
 
     def objects(self, protocol=None, purposes=None, model_ids=None,
-                groups=None, classes=None, subworld=None, gender=None, device=None):
+                groups=None, gender=None, device=None):
         """Returns a set of Files for the specific query by the user.
 
         Keyword Parameters:
 
         protocol
-            One of the MOBIO protocols ('mobile0-male', 'mobile0-female', 'mobile1-male', 'mobile1-female', \
+            One of the SWAN protocols ('mobile0-male', 'mobile0-female', 'mobile1-male', 'mobile1-female', \
                 'laptop1-male', 'laptop1-female', 'laptop_mobile1-male', 'laptop_mobile1-female')
             'male'and 'female' are aliases for 'mobile0-male' and 'mobile0-female', respectively.
 
@@ -375,16 +237,6 @@ class Database(bob.db.base.SQLiteDatabase):
             If 'None' is given (this is the default), it is considered the same as a
             tuple with all possible values.
 
-        classes
-            The classes (types of accesses) to be retrieved ('client', 'impostor')
-            or a tuple with several of them. If 'None' is given (this is the
-            default), it is considered the same as a tuple with all possible values.
-
-        subworld
-            Specify a split of the world data ('onethird', 'twothirds', 'twothirds-subsampled')
-            In order to be considered, "world" should be in groups and only one
-            split should be specified.
-
         gender
             The gender to consider ('male', 'female')
 
@@ -394,17 +246,12 @@ class Database(bob.db.base.SQLiteDatabase):
         Returns: A set of Files with the given properties.
         """
 
-        protocol = self._replace_protocols_alias(protocol)
         protocol = self.check_parameters_for_validity(
             protocol, "protocol", self.protocol_names())
         purposes = self.check_parameters_for_validity(
             purposes, "purpose", self.purposes())
         groups = self.check_parameters_for_validity(
             groups, "group", self.groups())
-        classes = self.check_parameters_for_validity(
-            classes, "class", ('client', 'impostor'))
-        subworld = self.check_parameters_for_validity(
-            subworld, "subworld", self.subworld_names(), [])
         gender = self.check_parameters_for_validity(
             gender, "gender", self.genders(), [])
         device = self.check_parameters_for_validity(
@@ -422,17 +269,13 @@ class Database(bob.db.base.SQLiteDatabase):
             q = self.query(File).join(Client).filter(Client.sgroup == 'world').join((ProtocolPurpose, File.protocol_purposes)).join(Protocol).\
                 filter(and_(Protocol.name.in_(protocol),
                             ProtocolPurpose.sgroup == 'world'))
-            if subworld:
-                q = q.join((Subworld, File.subworld)).filter(
-                    Subworld.name.in_(subworld))
             if gender:
                 q = q.filter(Client.gender.in_(gender))
             if device:
                 q = q.filter(File.device.in_(device))
             if model_ids:
                 q = q.filter(File.client_id.in_(model_ids))
-            q = q.order_by(File.client_id, File.session_id,
-                           File.speech_type, File.shot_id, File.device)
+            q = q.order_by(File.client_id, File.session, File.device)
             retval += list(q)
 
         if ('dev' in groups or 'eval' in groups):
@@ -446,186 +289,35 @@ class Database(bob.db.base.SQLiteDatabase):
                     q = q.filter(File.device.in_(device))
                 if model_ids:
                     q = q.filter(Client.id.in_(model_ids))
-                q = q.order_by(File.client_id, File.session_id,
-                               File.speech_type, File.shot_id, File.device)
+                q = q.order_by(File.client_id, File.session, File.device)
                 retval += list(q)
 
             if('probe' in purposes):
-                if('client' in classes):
-                    q = self.query(File).join(Client).join((ProtocolPurpose, File.protocol_purposes)).join(Protocol).\
-                        filter(and_(Protocol.name.in_(protocol), ProtocolPurpose.sgroup.in_(
-                            groups), ProtocolPurpose.purpose == 'probe'))
-                    if gender:
-                        q = q.filter(Client.gender.in_(gender))
-                    if device:
-                        q = q.filter(File.device.in_(device))
-                    if model_ids:
-                        q = q.filter(Client.id.in_(model_ids))
-                    q = q.order_by(File.client_id, File.session_id,
-                                   File.speech_type, File.shot_id, File.device)
-                    retval += list(q)
-
-                if('impostor' in classes):
-                    q = self.query(File).join(Client).join((ProtocolPurpose, File.protocol_purposes)).join(Protocol).\
-                        filter(and_(Protocol.name.in_(protocol), ProtocolPurpose.sgroup.in_(
-                            groups), ProtocolPurpose.purpose == 'probe'))
-                    if gender:
-                        q = q.filter(Client.gender.in_(gender))
-                    if device:
-                        q = q.filter(File.device.in_(device))
-                    if len(model_ids) == 1:
-                        q = q.filter(not_(File.client_id.in_(model_ids)))
-                    q = q.order_by(File.client_id, File.session_id,
-                                   File.speech_type, File.shot_id, File.device)
-                    retval += list(q)
-
-        return list(set(retval))  # To remove duplicates
-
-    def tobjects(self, protocol=None, model_ids=None, groups=None, subworld='onethird', gender=None, speech_type=None, device=None):
-        """Returns a set of filenames for enrolling T-norm models for score
-             normalization.
-
-        Keyword Parameters:
-
-        protocol
-            One of the MOBIO protocols ('mobile0-male', 'mobile0-female', 'mobile1-male', 'mobile1-female', \
-                'laptop1-male', 'laptop1-female', 'laptop_mobile1-male', 'laptop_mobile1-female')
-            'male'and 'female' are aliases for 'mobile0-male' and 'mobile0-female', respectively.
-
-        model_ids
-            Only retrieves the files for the provided list of model ids.
-            If 'None' is given (this is the default), no filter over
-            the model_ids is performed.
-
-        groups
-            Ignored.
-            For the MOBIO database, this has no impact as the T-Norm clients are coming from
-            the 'world' set, and are hence the same for both the 'dev' and 'eval' sets.
-
-        subworld
-            Specify a split of the world data ('onethird', 'twothirds', 'twothirds-subsampled')
-            Please note that 'onethird' is the default value.
-
-        gender
-            The gender to consider ('male', 'female')
-
-        speech_type
-            The speech type to consider ('p', 'l', 'r', 'f')
-
-        device
-            The device choice to consider ('mobile', 'laptop')
-
-        Returns: A set of Files with the given properties.
-        """
-
-        protocol = self._replace_protocols_alias(protocol)
-        protocol = self.check_parameters_for_validity(
-            protocol, "protocol", self.protocol_names())
-        subworld = self.check_parameters_for_validity(
-            subworld, "subworld", self.subworld_names(), [])
-        gender = self.check_parameters_for_validity(
-            gender, "gender", self.genders(), [])
-
-        if(model_ids is None):
-            model_ids = ()
-        elif isinstance(model_ids, six.string_types):
-            model_ids = (model_ids,)
-
-        # Now query the database
-        q = self.query(File, Protocol).filter(
-            Protocol.name.in_(protocol)).join(Client)
-        if subworld:
-            q = q.join((Subworld, File.subworld)).filter(
-                Subworld.name.in_(subworld))
-        q = q.join((TModel, File.tmodels)).filter(
-            TModel.protocol_id == Protocol.id)
-        if model_ids:
-            q = q.filter(TModel.mid.in_(model_ids))
-        if gender:
-            q = q.filter(Client.gender.in_(gender))
-        if speech_type:
-            q = q.filter(File.speech_type.in_(speech_type))
-        if device:
-            q = q.filter(File.device.in_(device))
-        q = q.order_by(File.client_id, File.session_id,
-                       File.speech_type, File.shot_id, File.device)
-        retval = [v[0] for v in q]
-        return list(retval)
-
-    def zobjects(self, protocol=None, model_ids=None, groups=None, subworld='onethird', gender=None, speech_type=['r', 'f'], device=['mobile']):
-        """Returns a set of Files to perform Z-norm score normalization.
-
-        Keyword Parameters:
-
-        protocol
-            One of the MOBIO protocols ('mobile0-male', 'mobile0-female', 'mobile1-male', 'mobile1-female', \
-                'laptop1-male', 'laptop1-female', 'laptop_mobile1-male', 'laptop_mobile1-female')
-            'male'and 'female' are aliases for 'mobile0-male' and 'mobile0-female', respectively.
-
-        model_ids
-            Only retrieves the files for the provided list of model ids (claimed
-            client id).  If 'None' is given (this is the default), no filter over
-            the model_ids is performed.
-
-        groups
-            Ignored.
-            For the MOBIO database, this has no impact as the Z-Norm clients are coming from
-            the 'world' set, and are hence the same for both the 'dev' and 'eval' sets.
-
-        subworld
-            Specify a split of the world data ('onethird', 'twothirds', 'twothirds-subsampled')
-            Please note that 'onethird' is the default value.
-
-        gender
-            The gender to consider ('male', 'female')
-
-        speech_type
-            The speech type to consider ('p', 'l', 'r', 'f')
-
-        device
-            The device choice to consider ('mobile', 'laptop')
-
-        Returns: A set of Files with the given properties.
-        """
-
-        protocol = self._replace_protocols_alias(protocol)
-        protocol = self.check_parameters_for_validity(
-            protocol, "protocol", self.protocol_names())
-        groups = self.check_parameters_for_validity(
-            groups, "group", self.groups())
-        subworld = self.check_parameters_for_validity(
-            subworld, "subworld", self.subworld_names(), [])
-        gender = self.check_parameters_for_validity(
-            gender, "gender", self.genders(), [])
-        speech_type = self.check_parameters_for_validity(
-            speech_type, "speech_type", File.speech_type_choices)
-        device = self.check_parameters_for_validity(
-            device, "device", File.device_choices)
+                q = self.query(File).join(Client).join((ProtocolPurpose, File.protocol_purposes)).join(Protocol).\
+                    filter(and_(Protocol.name.in_(protocol), ProtocolPurpose.sgroup.in_(
+                        groups), ProtocolPurpose.purpose == 'probe'))
+                if gender:
+                    q = q.filter(Client.gender.in_(gender))
+                if device:
+                    q = q.filter(File.device.in_(device))
+                if model_ids:
+                    q = q.filter(Client.id.in_(model_ids))
+                q = q.order_by(File.client_id, File.session, File.device)
+                retval += list(q)
 
-        import collections
-        if(model_ids is None):
-            model_ids = ()
-        elif not isinstance(model_ids, collections.Iterable):
-            model_ids = (model_ids,)
+                q = self.query(File).join(Client).join((ProtocolPurpose, File.protocol_purposes)).join(Protocol).\
+                    filter(and_(Protocol.name.in_(protocol), ProtocolPurpose.sgroup.in_(
+                        groups), ProtocolPurpose.purpose == 'probe'))
+                if gender:
+                    q = q.filter(Client.gender.in_(gender))
+                if device:
+                    q = q.filter(File.device.in_(device))
+                if len(model_ids) == 1:
+                    q = q.filter(not_(File.client_id.in_(model_ids)))
+                q = q.order_by(File.client_id, File.session, File.device)
+                retval += list(q)
 
-        # Now query the database
-        q = self.query(File).join(Client).filter(Client.sgroup == 'world').join((ProtocolPurpose, File.protocol_purposes)).join(Protocol).\
-            filter(and_(Protocol.name.in_(protocol),
-                        ProtocolPurpose.sgroup == 'world'))
-        if subworld:
-            q = q.join((Subworld, File.subworld)).filter(
-                Subworld.name.in_(subworld))
-        if gender:
-            q = q.filter(Client.gender.in_(gender))
-        if speech_type:
-            q = q.filter(File.speech_type.in_(speech_type))
-        if device:
-            q = q.filter(File.device.in_(device))
-        if model_ids:
-            q = q.filter(File.client_id.in_(model_ids))
-        q = q.order_by(File.client_id, File.session_id,
-                       File.speech_type, File.shot_id, File.device)
-        return list(q)
+        return list(set(retval))  # To remove duplicates
 
     def annotations(self, file):
         """Reads the annotations for the given file id from file and returns them in a dictionary.
@@ -649,36 +341,3 @@ class Database(bob.db.base.SQLiteDatabase):
 
         # return the annotations as read from file
         return bob.db.base.read_annotation_file(annotation_file, 'eyecenter')
-
-    def protocol_names(self):
-        """Returns all registered protocol names"""
-
-        l = self.protocols()
-        retval = [str(k.name) for k in l]
-        return retval
-
-    def protocols(self):
-        """Returns all registered protocols"""
-
-        return list(self.query(Protocol))
-
-    def has_protocol(self, name):
-        """Tells if a certain protocol is available"""
-
-        return self.query(Protocol).filter(Protocol.name == self._replace_protocol_alias(name)).count() != 0
-
-    def protocol(self, name):
-        """Returns the protocol object in the database given a certain name. Raises
-        an error if that does not exist."""
-
-        return self.query(Protocol).filter(Protocol.name == self._replace_protocol_alias(name)).one()
-
-    def protocol_purposes(self):
-        """Returns all registered protocol purposes"""
-
-        return list(self.query(ProtocolPurpose))
-
-    def purposes(self):
-        """Returns the list of allowed purposes"""
-
-        return ProtocolPurpose.purpose_choices
diff --git a/bob/db/swan/script/__init__.py b/bob/db/swan/script/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/bob/db/swan/script/generate_filelist.py b/bob/db/swan/script/generate_filelist.py
deleted file mode 100644
index 63c9f37..0000000
--- a/bob/db/swan/script/generate_filelist.py
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python
-# vim: set fileencoding=utf-8 :
-
-"""Prints the version of bob and exits
-"""
-
-def main():
-  """Main routine, called by the script that gets the configuration of bob.blitz"""
-
-  import bob.blitz
-  print (bob.blitz.get_config())
-  return 0
-
diff --git a/bob/db/swan/test.py b/bob/db/swan/test.py
index 963b2c1..082a040 100644
--- a/bob/db/swan/test.py
+++ b/bob/db/swan/test.py
@@ -3,3 +3,54 @@
 
 """Test Units
 """
+
+from .query import Database
+
+
+def test_idiap0_audio():
+    protocol = 'idiap0-audio'
+    db = Database()
+    files = db.objects(protocol=protocol, groups='world', purposes='train')
+    # 20 clients, 8 recordings, (2 devices in session 1 and 1 device in
+    # sessions 2-6) == like it is 1 device and 7 sessions
+    assert len(files) == 20 * 8 * 1 * 7, len(files)
+    assert len(set(f.client.id for f in files)) == 20
+    assert len(set(f.nrecording for f in files)) == 8
+    assert len(set(f.device for f in files)) == 2
+    assert len(set(f.session for f in files)) == 6
+    assert set(f.client.institute for f in files) == set(['IDIAP'])
+    assert all(f.client.orig_id < 25 for f in files)
+    files = db.objects(protocol=protocol, groups='dev', purposes='enroll')
+    assert len(files) == 15 * 8 * 1 * 1, len(files)
+    assert len(set(f.client.id for f in files)) == 15
+    assert len(set(f.nrecording for f in files)) == 8
+    assert len(set(f.device for f in files)) == 1
+    assert all(f.session == 1 for f in files)
+    assert set(f.client.institute for f in files) == set(['IDIAP'])
+    assert all(f.client.orig_id >= 25 and f.client.orig_id < 41 for f in files)
+    files = db.objects(protocol=protocol, groups='dev', purposes='probe')
+    assert len(files) == 15 * 8 * 1 * 5, len(files)
+    assert len(set(f.client.id for f in files)) == 15
+    assert len(set(f.nrecording for f in files)) == 8
+    assert len(set(f.device for f in files)) == 1
+    assert len(set(f.session for f in files)) == 5
+    assert all(f.session > 1 for f in files)
+    assert set(f.client.institute for f in files) == set(['IDIAP'])
+    assert all(f.client.orig_id >= 25 and f.client.orig_id < 41 for f in files)
+    files = db.objects(protocol=protocol, groups='eval', purposes='enroll')
+    assert len(files) == 15 * 8 * 1 * 1, len(files)
+    assert len(set(f.client.id for f in files)) == 15
+    assert len(set(f.nrecording for f in files)) == 8
+    assert len(set(f.device for f in files)) == 1
+    assert all(f.session == 1 for f in files)
+    assert set(f.client.institute for f in files) == set(['IDIAP'])
+    assert all(f.client.orig_id >= 41 and f.client.orig_id < 61 for f in files)
+    files = db.objects(protocol=protocol, groups='eval', purposes='probe')
+    assert len(files) == 15 * 8 * 1 * 5, len(files)
+    assert len(set(f.client.id for f in files)) == 15
+    assert len(set(f.nrecording for f in files)) == 8
+    assert len(set(f.device for f in files)) == 1
+    assert len(set(f.session for f in files)) == 5
+    assert all(f.session > 1 for f in files)
+    assert set(f.client.institute for f in files) == set(['IDIAP'])
+    assert all(f.client.orig_id >= 41 and f.client.orig_id < 61 for f in files)
diff --git a/doc/conf.py b/doc/conf.py
index 64c4c51..880515a 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -232,7 +232,6 @@ autodoc_member_order = 'bysource'
 autodoc_default_flags = [
   'members',
   'undoc-members',
-  'inherited-members',
   'show-inheritance',
   ]
 
diff --git a/doc/index.rst b/doc/index.rst
index 6d33f00..21ff457 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -1,17 +1,23 @@
 .. vim: set fileencoding=utf-8 :
-.. Andre Anjos <andre.anjos@idiap.ch>
-.. Mon 13 Aug 2012 12:36:40 CEST
 
 .. _bob.db.swan:
 
-=====================
- Bob Example Project
-=====================
+==================================
+ SWAN Database Access API for Bob
+==================================
+
+To use this database, you may need to download additional files:
+
+.. code-block:: sh
+
+	$ bob_dbmanage.py swan download --missing
 
 
 Package Documentation
 ---------------------
 
 .. automodule:: bob.db.swan
+.. automodule:: bob.db.swan.query
+.. automodule:: bob.db.swan.models
 
 
diff --git a/doc/nitpick-exceptions.txt b/doc/nitpick-exceptions.txt
new file mode 100644
index 0000000..5c331f3
--- /dev/null
+++ b/doc/nitpick-exceptions.txt
@@ -0,0 +1,2 @@
+py:class sqlalchemy.ext.declarative.api.Base
+py:exc ValueError
diff --git a/requirements.txt b/requirements.txt
index 7b607ad..a8c1793 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,8 +1,19 @@
 setuptools
 -egit+git@gitlab.idiap.ch:bob/bob.extension.git#egg=bob.extension
+bob.extension
 -egit+git@gitlab.idiap.ch:bob/bob.blitz.git#egg=bob.blitz
+bob.blitz
 -egit+git@gitlab.idiap.ch:bob/bob.core.git#egg=bob.core
+bob.core
 -egit+git@gitlab.idiap.ch:bob/bob.io.base.git#egg=bob.io.base
+bob.io.base
 -egit+git@gitlab.idiap.ch:bob/bob.io.image.git#egg=bob.io.image
+bob.io.image
 -egit+git@gitlab.idiap.ch:bob/bob.io.video.git#egg=bob.io.video
+bob.io.video
 -egit+git@gitlab.idiap.ch:bob/bob.db.base.git#egg=bob.db.base
+bob.db.base
+-egit+git@gitlab.idiap.ch:bob/bob.bio.base.git#egg=bob.bio.base
+bob.bio.base
+-egit+git@gitlab.idiap.ch:bob/bob.bio.spear.git#egg=bob.bio.spear
+bob.bio.spear
diff --git a/setup.py b/setup.py
index e17ca5d..13d77ec 100644
--- a/setup.py
+++ b/setup.py
@@ -12,7 +12,7 @@ setup(
     version=open("version.txt").read().rstrip(),
     description='SWAN Database Access API for Bob',
 
-    url='',
+    url='https://gitlab.idiap.ch/bob/bob.db.swan',
     license='BSD',
     author='Amir Mohammadi',
     author_email='amir.mohammadi@idiap.ch',
@@ -37,10 +37,6 @@ setup(
     entry_points={
 
         'bob.db': ['swan = bob.db.swan.driver:Interface'],
-        'console_scripts': [
-            'bob_db_swan_generate_filelist = '
-            'bob.db.swan.script.generate_filelist:main',
-        ],
     },
 
     classifiers=[
-- 
GitLab