query.py 25 KB
Newer Older
1
2
3
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Laurent El Shafey <Laurent.El-Shafey@idiap.ch>
4
5
6
7
8
9
10
11
12
13
14
15
16
17
#
# Copyright (C) 2011-2013 Idiap Research Institute, Martigny, Switzerland
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
18
19
20
21
22

"""This module provides the Dataset interface allowing the user to query the
MOBIO database in the most obvious ways.
"""

23
import six
24
25
from .models import *
from .driver import Interface
26
from sqlalchemy import and_, not_
27

28
import bob.db.base
29

30
SQLITE_FILE = Interface().files()[0]
31

32
33

class Database(bob.db.base.SQLiteDatabase):
34
35
36
37
38
39
  """The dataset class opens and maintains a connection opened to the Database.

  It provides many different ways to probe for the characteristics of the data
  and for the data itself inside the database.
  """

40
  def __init__(self, original_directory=None, original_extension=None, annotation_directory=None, annotation_extension='.pos'):
41
    # call base class constructors to open a session to the database
42
    bob.db.base.SQLiteDatabase.__init__(self, SQLITE_FILE, File)
43
44
45

    self.annotation_directory = annotation_directory
    self.annotation_extension = annotation_extension
46

Laurent EL SHAFEY's avatar
Laurent EL SHAFEY committed
47
  def groups(self, protocol=None):
48
49
50
51
52
    """Returns the names of all registered groups"""

    return ProtocolPurpose.group_choices

  def genders(self):
53
    """Returns the list of genders"""
54
55
56
57
58
59
60
61
62
63
64
65
66
67

    return Client.gender_choices

  def subworld_names(self):
    """Returns all registered subworld names"""

    self.assert_validity()
    l = self.subworlds()
    retval = [str(k.name) for k in l]
    return retval

  def subworlds(self):
    """Returns the list of subworlds"""

68
    return list(self.query(Subworld))
69

70
71
72
73
  def has_subworld(self, name):
    """Tells if a certain subworld is available"""

    self.assert_validity()
74
    return self.query(Subworld).filter(Subworld.name == name).count() != 0
75

76
  def _replace_protocol_alias(self, protocol):
77
78
79
80
81
82
    if protocol == 'male':
      return 'mobile0-male'
    elif protocol == 'female':
      return 'mobile0-female'
    else:
      return protocol
83
84

  def _replace_protocols_alias(self, protocol):
85
    # print(protocol)
86
    if protocol:
87
      from six import string_types
88
      if isinstance(protocol, string_types):
89
        # print([self._replace_protocol_alias(protocol)])
90
91
        return [self._replace_protocol_alias(protocol)]
      else:
92
        # print(list(set(self._replace_protocol_alias(k) for k in protocols)))
93
        return list(set(self._replace_protocol_alias(k) for k in protocols))
94
95
    else:
      return None
96

97
  def clients(self, protocol=None, groups=None, subworld=None, gender=None):
98
    """Returns a list of Clients for the specific query by the user.
99
100
101
102

    Keyword Parameters:

    protocol
Laurent EL SHAFEY's avatar
Laurent EL SHAFEY committed
103
104
      One of the MOBIO protocols ('mobile0-male', 'mobile0-female', 'mobile1-male', 'mobile1-female', \
        'laptop1-male', 'laptop1-female', 'laptop_mobile1-male', 'laptop_mobile1-female')
105
      'male'and 'female' are aliases for 'mobile0-male' and 'mobile0-female', respectively.
106
107
108
109
110
111

    groups
      The groups to which the clients belong ('dev', 'eval', 'world')
      Please note that world data are protocol/gender independent

    subworld
112
      Specify a split of the world data ('onethird', 'twothirds', 'twothirds-subsampled')
113
114
      In order to be considered, 'world' should be in groups and only one
      split should be specified.
115
116
117
118

    gender
      The gender to consider ('male', 'female')

119
    Returns: A list containing all the clients which have the given properties.
120
121
    """

122
    protocol = self._replace_protocols_alias(protocol)
123
    protocol = self.check_parameters_for_validity(protocol, "protocol", self.protocol_names(), [])
124
    groups = self.check_parameters_for_validity(groups, "group", self.groups(), self.groups())
125
126
    subworld = self.check_parameters_for_validity(subworld, "subworld", self.subworld_names(), [])
    gender = self.check_parameters_for_validity(gender, "gender", self.genders(), [])
127
128

    # List of the clients
129
130
131
132
133
134
135
136
137
138
139
    retval = []
    if 'world' in groups:
      q = self.query(Client).filter(Client.sgroup == 'world')
      if subworld:
        q = q.join((Subworld, Client.subworld)).filter(Subworld.name.in_(subworld))
      if gender:
        q = q.filter(Client.gender.in_(gender))
      q = q.order_by(Client.id)
      retval += list(q)

    dev_eval = []
140
141
142
143
    if 'dev' in groups:
      dev_eval.append('dev')
    if 'eval' in groups:
      dev_eval.append('eval')
144
145
146
147
148
149
150
151
152
153
154
155
156
157
    if dev_eval:
      protocol_gender = None
      if protocol:
        q = self.query(Protocol).filter(Protocol.name.in_(protocol)).one()
        protocol_gender = [q.gender]
      q = self.query(Client).filter(Client.sgroup.in_(dev_eval))
      if protocol_gender:
        q = q.filter(Client.gender.in_(protocol_gender))
      if gender:
        q = q.filter(Client.gender.in_(gender))
      q = q.order_by(Client.id)
      retval += list(q)

    return retval
158

159
160
  def has_client_id(self, id):
    """Returns True if we have a client with a certain integer identifier"""
161

162
    return self.query(Client).filter(Client.id == id).count() != 0
163
164
165
166

  def client(self, id):
    """Returns the Client object in the database given a certain id. Raises
    an error if that does not exist."""
167

168
    return self.query(Client).filter(Client.id == id).one()
169
170

  def tclients(self, protocol=None, groups=None, subworld='onethird', gender=None):
171
172
173
174
175
    """Returns a set of T-Norm clients for the specific query by the user.

    Keyword Parameters:

    protocol
Laurent EL SHAFEY's avatar
Laurent EL SHAFEY committed
176
177
      One of the MOBIO protocols ('mobile0-male', 'mobile0-female', 'mobile1-male', 'mobile1-female', \
        'laptop1-male', 'laptop1-female', 'laptop_mobile1-male', 'laptop_mobile1-female')
178
      'male'and 'female' are aliases for 'mobile0-male' and 'mobile0-female', respectively.
179

180
    groups
181
      Ignored.
182
      For the MOBIO database, this has no impact as the T-Norm clients are coming from
183
184
185
      the 'world' set, and are hence the same for both the 'dev' and 'eval' sets.

    subworld
186
      Specify a split of the world data ('onethird', 'twothirds', 'twothirds-subsampled')
187
      Please note that 'onethird' is the default value.
188
189
190
191

    gender
      The gender to consider ('male', 'female')

192
    Returns: A list containing all the T-norm clients belonging to the given group.
193
194
    """

195
    return self.clients(protocol, 'world', subworld, gender)
196

197
  def zclients(self, protocol=None, groups=None, subworld='onethird', gender=None):
198
199
200
201
202
    """Returns a set of Z-Norm clients for the specific query by the user.

    Keyword Parameters:

    protocol
Laurent EL SHAFEY's avatar
Laurent EL SHAFEY committed
203
204
      One of the MOBIO protocols ('mobile0-male', 'mobile0-female', 'mobile1-male', 'mobile1-female', \
        'laptop1-male', 'laptop1-female', 'laptop_mobile1-male', 'laptop_mobile1-female')
205
      'male'and 'female' are aliases for 'mobile0-male' and 'mobile0-female', respectively.
206

207
    groups
208
      Ignored.
209
210
211
212
      For the MOBIO database, this has no impact as the Z-Norm clients are coming from
      the 'world' set, and are hence the same for both the 'dev' and 'eval' sets.

    subworld
213
      Specify a split of the world data ('onethird', 'twothirds', 'twothirds-subsampled')
214
      Please note that 'onethird' is the default value.
215
216
217
218

    gender
      The gender to consider ('male', 'female')

219
    Returns: A list containing all the Z-norm clients belonging to the given group.
220
221
    """

222
    return self.clients(protocol, 'world', subworld, gender)
223
224
225
226
227
228
229

  def models(self, protocol=None, groups=None, subworld=None, gender=None):
    """Returns a set of models for the specific query by the user.

    Keyword Parameters:

    protocol
Laurent EL SHAFEY's avatar
Laurent EL SHAFEY committed
230
231
      One of the MOBIO protocols ('mobile0-male', 'mobile0-female', 'mobile1-male', 'mobile1-female', \
        'laptop1-male', 'laptop1-female', 'laptop_mobile1-male', 'laptop_mobile1-female')
232
      'male'and 'female' are aliases for 'mobile0-male' and 'mobile0-female', respectively.
233
234

    groups
235
      The groups to which the subjects attached to the models belong ('dev', 'eval', 'world')
236
237
      Please note that world data are protocol/gender independent

238
    subworld
239
      Specify a split of the world data ('onethird', 'twothirds', 'twothirds-subsampled')
240
241
      In order to be considered, 'world' should be in groups and only one
      split should be specified.
242

243
244
245
    gender
      The gender to consider ('male', 'female')

246
    Returns: A list containing all the models belonging to the given group.
247
248
249
250
    """

    return self.clients(protocol, groups, subworld, gender)

251
252
253
254
255
256
  def model_ids(self, protocol=None, groups=None, subworld=None, gender=None):
    """Returns a set of models ids for the specific query by the user.

    Keyword Parameters:

    protocol
Laurent EL SHAFEY's avatar
Laurent EL SHAFEY committed
257
258
      One of the MOBIO protocols ('mobile0-male', 'mobile0-female', 'mobile1-male', 'mobile1-female', \
        'laptop1-male', 'laptop1-female', 'laptop_mobile1-male', 'laptop_mobile1-female')
259
      'male'and 'female' are aliases for 'mobile0-male' and 'mobile0-female', respectively.
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277

    groups
      The groups to which the subjects attached to the models belong ('dev', 'eval', 'world')
      Please note that world data are protocol/gender independent

    subworld
      Specify a split of the world data ('onethird', 'twothirds', 'twothirds-subsampled')
      In order to be considered, 'world' should be in groups and only one
      split should be specified.

    gender
      The gender to consider ('male', 'female')

    Returns: A list containing the ids of all models belonging to the given group.
    """

    return [client.id for client in self.clients(protocol, groups, subworld, gender)]

278
  def tmodels(self, protocol=None, groups=None, subworld='onethird', gender=None):
279
280
281
282
283
    """Returns a set of T-Norm models for the specific query by the user.

    Keyword Parameters:

    protocol
Laurent EL SHAFEY's avatar
Laurent EL SHAFEY committed
284
285
      One of the MOBIO protocols ('mobile0-male', 'mobile0-female', 'mobile1-male', 'mobile1-female', \
        'laptop1-male', 'laptop1-female', 'laptop_mobile1-male', 'laptop_mobile1-female')
286
      'male'and 'female' are aliases for 'mobile0-male' and 'mobile0-female', respectively.
287

288
    groups
289
      Ignored.
290
      For the MOBIO database, this has no impact as the T-Norm clients are coming from
291
292
293
      the 'world' set, and are hence the same for both the 'dev' and 'eval' sets.

    subworld
294
      Specify a split of the world data ('onethird', 'twothirds', 'twothirds-subsampled')
295
      Please note that 'onethird' is the default value.
296
297
298
299

    gender
      The gender to consider ('male', 'female')

300
    Returns: A list containing all the T-norm models belonging to the given group.
301
302
    """

303
304
    protocol = self._replace_protocols_alias(protocol)
    protocol = self.check_parameters_for_validity(protocol, "protocol", self.protocol_names())
305
306
    subworld = self.check_parameters_for_validity(subworld, "subworld", self.subworld_names(), [])
    gender = self.check_parameters_for_validity(gender, "gender", self.genders(), [])
307
308

    # List of the clients
309
    q = self.query(TModel).join(Client).join(Protocol).filter(Protocol.name.in_(protocol))
310
    if subworld:
311
      q = q.join((Subworld, Client.subworld)).filter(Subworld.name.in_(subworld))
312
313
    if gender:
      q = q.filter(Client.gender.in_(gender))
314
315
    q = q.order_by(TModel.id)
    return list(q)
316

317
318
  def tmodel_ids(self, protocol=None, groups=None, subworld='onethird', gender=None):
    """Returns a list of ids of T-Norm models for the specific query by the user.
319

320
321
322
    Keyword Parameters:

    protocol
Laurent EL SHAFEY's avatar
Laurent EL SHAFEY committed
323
324
      One of the MOBIO protocols ('mobile0-male', 'mobile0-female', 'mobile1-male', 'mobile1-female', \
        'laptop1-male', 'laptop1-female', 'laptop_mobile1-male', 'laptop_mobile1-female')
325
      'male'and 'female' are aliases for 'mobile0-male' and 'mobile0-female', respectively.
326

327
328
    groups
      Ignored.
329
      For the MOBIO database, this has no impact as the T-Norm clients are coming from
330
331
332
333
334
335
336
337
338
339
340
      the 'world' set, and are hence the same for both the 'dev' and 'eval' sets.

    subworld
      Specify a split of the world data ('onethird', 'twothirds', 'twothirds-subsampled')
      Please note that 'onethird' is the default value.

    gender
      The gender to consider ('male', 'female')

    Returns: A list containing the ids of all T-norm models belonging to the given group.
    """
341
    return [tmodel.mid for tmodel in self.tmodels(protocol, groups, subworld, gender)]
342

Manuel Günther's avatar
Manuel Günther committed
343
  def get_client_id_from_model_id(self, model_id, **kwargs):
344
    """Returns the client_id attached to the given model_id
345

346
347
348
349
350
351
352
353
354
    Keyword Parameters:

    model_id
      The model_id to consider

    Returns: The client_id attached to the given model_id
    """
    return model_id

355
  def objects(self, protocol=None, purposes=None, model_ids=None,
356
              groups=None, classes=None, subworld=None, gender=None, device=None):
357
    """Returns a set of Files for the specific query by the user.
358
359
360
361

    Keyword Parameters:

    protocol
Laurent EL SHAFEY's avatar
Laurent EL SHAFEY committed
362
363
      One of the MOBIO protocols ('mobile0-male', 'mobile0-female', 'mobile1-male', 'mobile1-female', \
        'laptop1-male', 'laptop1-female', 'laptop_mobile1-male', 'laptop_mobile1-female')
364
      'male'and 'female' are aliases for 'mobile0-male' and 'mobile0-female', respectively.
365
366

    purposes
367
      The purposes required to be retrieved ('enroll', 'probe') or a tuple
368
      with several of them. If 'None' is given (this is the default), it is
369
370
371
372
      considered the same as a tuple with all possible values. This field is
      ignored for the data from the "world" group.

    model_ids
373
374
      Only retrieves the files for the provided list of model ids (claimed
      client id).  If 'None' is given (this is the default), no filter over
375
376
377
      the model_ids is performed.

    groups
378
379
      One of the groups ('dev', 'eval', 'world') or a tuple with several of them.
      If 'None' is given (this is the default), it is considered the same as a
380
381
382
      tuple with all possible values.

    classes
383
384
      The classes (types of accesses) to be retrieved ('client', 'impostor')
      or a tuple with several of them. If 'None' is given (this is the
385
386
387
      default), it is considered the same as a tuple with all possible values.

    subworld
388
      Specify a split of the world data ('onethird', 'twothirds', 'twothirds-subsampled')
389
390
      In order to be considered, "world" should be in groups and only one
      split should be specified.
391
392
393
394

    gender
      The gender to consider ('male', 'female')

395
396
397
    device
      The device to consider ('laptop', 'mobile')

398
    Returns: A set of Files with the given properties.
399
400
    """

401
    protocol = self._replace_protocols_alias(protocol)
402
403
404
405
406
407
    protocol = self.check_parameters_for_validity(protocol, "protocol", self.protocol_names())
    purposes = self.check_parameters_for_validity(purposes, "purpose", self.purposes())
    groups = self.check_parameters_for_validity(groups, "group", self.groups())
    classes = self.check_parameters_for_validity(classes, "class", ('client', 'impostor'))
    subworld = self.check_parameters_for_validity(subworld, "subworld", self.subworld_names(), [])
    gender = self.check_parameters_for_validity(gender, "gender", self.genders(), [])
408
    device = self.check_parameters_for_validity(device, "device", File.device_choices, [])
409

410
    import collections
411
412
    if(model_ids is None):
      model_ids = ()
413
    elif not isinstance(model_ids, collections.Iterable):
414
415
      model_ids = (model_ids,)

416
417
    # Now query the database
    retval = []
418
419
    if 'world' in groups and 'train' in purposes:
      q = self.query(File).join(Client).filter(Client.sgroup == 'world').join((ProtocolPurpose, File.protocol_purposes)).join(Protocol).\
420
          filter(and_(Protocol.name.in_(protocol), ProtocolPurpose.sgroup == 'world'))
421
      if subworld:
422
        q = q.join((Subworld, File.subworld)).filter(Subworld.name.in_(subworld))
423
      if gender:
424
        q = q.filter(Client.gender.in_(gender))
425
426
      if device:
        q = q.filter(File.device.in_(device))
427
428
      if model_ids:
        q = q.filter(File.client_id.in_(model_ids))
429
      q = q.order_by(File.client_id, File.session_id, File.speech_type, File.shot_id, File.device)
430
      retval += list(q)
431

432
    if ('dev' in groups or 'eval' in groups):
433
      if('enroll' in purposes):
434
        q = self.query(File).join(Client).join((ProtocolPurpose, File.protocol_purposes)).join(Protocol).\
435
            filter(and_(Protocol.name.in_(protocol), ProtocolPurpose.sgroup.in_(groups), ProtocolPurpose.purpose == 'enroll'))
436
        if gender:
437
          q = q.filter(Client.gender.in_(gender))
438
439
        if device:
          q = q.filter(File.device.in_(device))
440
441
442
        if model_ids:
          q = q.filter(Client.id.in_(model_ids))
        q = q.order_by(File.client_id, File.session_id, File.speech_type, File.shot_id, File.device)
443
444
        retval += list(q)

445
446
      if('probe' in purposes):
        if('client' in classes):
447
          q = self.query(File).join(Client).join((ProtocolPurpose, File.protocol_purposes)).join(Protocol).\
448
              filter(and_(Protocol.name.in_(protocol), ProtocolPurpose.sgroup.in_(groups), ProtocolPurpose.purpose == 'probe'))
449
          if gender:
450
            q = q.filter(Client.gender.in_(gender))
451
452
          if device:
            q = q.filter(File.device.in_(device))
453
454
          if model_ids:
            q = q.filter(Client.id.in_(model_ids))
455
          q = q.order_by(File.client_id, File.session_id, File.speech_type, File.shot_id, File.device)
456
457
          retval += list(q)

458
        if('impostor' in classes):
459
          q = self.query(File).join(Client).join((ProtocolPurpose, File.protocol_purposes)).join(Protocol).\
460
              filter(and_(Protocol.name.in_(protocol), ProtocolPurpose.sgroup.in_(groups), ProtocolPurpose.purpose == 'probe'))
461
          if gender:
462
            q = q.filter(Client.gender.in_(gender))
463
464
          if device:
            q = q.filter(File.device.in_(device))
465
466
          if len(model_ids) == 1:
            q = q.filter(not_(File.client_id.in_(model_ids)))
467
          q = q.order_by(File.client_id, File.session_id, File.speech_type, File.shot_id, File.device)
468
          retval += list(q)
469

470
    return list(set(retval))  # To remove duplicates
471

472
  def tobjects(self, protocol=None, model_ids=None, groups=None, subworld='onethird', gender=None, speech_type=None, device=None):
473
    """Returns a set of filenames for enrolling T-norm models for score
474
       normalization.
475
476
477
478

    Keyword Parameters:

    protocol
Laurent EL SHAFEY's avatar
Laurent EL SHAFEY committed
479
480
      One of the MOBIO protocols ('mobile0-male', 'mobile0-female', 'mobile1-male', 'mobile1-female', \
        'laptop1-male', 'laptop1-female', 'laptop_mobile1-male', 'laptop_mobile1-female')
481
      'male'and 'female' are aliases for 'mobile0-male' and 'mobile0-female', respectively.
482
483
484
485
486

    model_ids
      Only retrieves the files for the provided list of model ids.
      If 'None' is given (this is the default), no filter over
      the model_ids is performed.
487
488

    groups
489
      Ignored.
490
      For the MOBIO database, this has no impact as the T-Norm clients are coming from
491
      the 'world' set, and are hence the same for both the 'dev' and 'eval' sets.
492
493

    subworld
494
      Specify a split of the world data ('onethird', 'twothirds', 'twothirds-subsampled')
495
      Please note that 'onethird' is the default value.
496
497
498
499

    gender
      The gender to consider ('male', 'female')

500
501
502
503
504
    speech_type
      The speech type to consider ('p', 'l', 'r', 'f')

    device
      The device choice to consider ('mobile', 'laptop')
505

506
    Returns: A set of Files with the given properties.
507
508
    """

509
510
    protocol = self._replace_protocols_alias(protocol)
    protocol = self.check_parameters_for_validity(protocol, "protocol", self.protocol_names())
511
512
    subworld = self.check_parameters_for_validity(subworld, "subworld", self.subworld_names(), [])
    gender = self.check_parameters_for_validity(gender, "gender", self.genders(), [])
513

514
515
    if(model_ids is None):
      model_ids = ()
516
    elif isinstance(model_ids, six.string_types):
517
      model_ids = (model_ids,)
518

519
    # Now query the database
520
    q = self.query(File, Protocol).filter(Protocol.name.in_(protocol)).join(Client)
521
    if subworld:
522
      q = q.join((Subworld, File.subworld)).filter(Subworld.name.in_(subworld))
523
    q = q.join((TModel, File.tmodels)).filter(TModel.protocol_id == Protocol.id)
524
    if model_ids:
525
      q = q.filter(TModel.mid.in_(model_ids))
526
    if gender:
527
528
529
530
531
      q = q.filter(Client.gender.in_(gender))
    if speech_type:
      q = q.filter(File.speech_type.in_(speech_type))
    if device:
      q = q.filter(File.device.in_(device))
532
    q = q.order_by(File.client_id, File.session_id, File.speech_type, File.shot_id, File.device)
533
    retval = [v[0] for v in q]
534
    return list(retval)
535

536
  def zobjects(self, protocol=None, model_ids=None, groups=None, subworld='onethird', gender=None, speech_type=['r', 'f'], device=['mobile']):
537
    """Returns a set of Files to perform Z-norm score normalization.
538
539
540
541

    Keyword Parameters:

    protocol
Laurent EL SHAFEY's avatar
Laurent EL SHAFEY committed
542
543
      One of the MOBIO protocols ('mobile0-male', 'mobile0-female', 'mobile1-male', 'mobile1-female', \
        'laptop1-male', 'laptop1-female', 'laptop_mobile1-male', 'laptop_mobile1-female')
544
      'male'and 'female' are aliases for 'mobile0-male' and 'mobile0-female', respectively.
545
546

    model_ids
547
548
      Only retrieves the files for the provided list of model ids (claimed
      client id).  If 'None' is given (this is the default), no filter over
549
550
551
      the model_ids is performed.

    groups
552
      Ignored.
553
554
      For the MOBIO database, this has no impact as the Z-Norm clients are coming from
      the 'world' set, and are hence the same for both the 'dev' and 'eval' sets.
555

556
    subworld
557
      Specify a split of the world data ('onethird', 'twothirds', 'twothirds-subsampled')
558
      Please note that 'onethird' is the default value.
559
560
561
562

    gender
      The gender to consider ('male', 'female')

563
564
565
566
567
    speech_type
      The speech type to consider ('p', 'l', 'r', 'f')

    device
      The device choice to consider ('mobile', 'laptop')
568

569
    Returns: A set of Files with the given properties.
570
571
    """

572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
    protocol = self._replace_protocols_alias(protocol)
    protocol = self.check_parameters_for_validity(protocol, "protocol", self.protocol_names())
    groups = self.check_parameters_for_validity(groups, "group", self.groups())
    subworld = self.check_parameters_for_validity(subworld, "subworld", self.subworld_names(), [])
    gender = self.check_parameters_for_validity(gender, "gender", self.genders(), [])
    speech_type = self.check_parameters_for_validity(speech_type, "speech_type", File.speech_type_choices)
    device = self.check_parameters_for_validity(device, "device", File.device_choices)

    import collections
    if(model_ids is None):
      model_ids = ()
    elif not isinstance(model_ids, collections.Iterable):
      model_ids = (model_ids,)

    # Now query the database
    q = self.query(File).join(Client).filter(Client.sgroup == 'world').join((ProtocolPurpose, File.protocol_purposes)).join(Protocol).\
588
        filter(and_(Protocol.name.in_(protocol), ProtocolPurpose.sgroup == 'world'))
589
590
591
592
593
594
595
596
597
598
599
600
    if subworld:
      q = q.join((Subworld, File.subworld)).filter(Subworld.name.in_(subworld))
    if gender:
      q = q.filter(Client.gender.in_(gender))
    if speech_type:
      q = q.filter(File.speech_type.in_(speech_type))
    if device:
      q = q.filter(File.device.in_(device))
    if model_ids:
      q = q.filter(File.client_id.in_(model_ids))
    q = q.order_by(File.client_id, File.session_id, File.speech_type, File.shot_id, File.device)
    return list(q)
601

602
  def annotations(self, file):
603
604
605
606
607
608
    """Reads the annotations for the given file id from file and returns them in a dictionary.

    If you don't have a copy of the annotation files, you can download them under http://www.idiap.ch/resource/biometric.

    Keyword parameters:

609
610
    file
      The ``File`` object for which the annotations should be read.
611
612
613
614
615
616
617
618

    Return value
      The annotations as a dictionary: {'reye':(re_y,re_x), 'leye':(le_y,le_x)}
    """
    if self.annotation_directory is None:
      return None

    self.assert_validity()
619
    annotation_file = file.make_path(self.annotation_directory, self.annotation_extension)
620
621

    # return the annotations as read from file
622
    return bob.db.base.read_annotation_file(annotation_file, 'eyecenter')
623

624
625
  def protocol_names(self):
    """Returns all registered protocol names"""
626

627
628
    l = self.protocols()
    retval = [str(k.name) for k in l]
629
630
    return retval

631
632
  def protocols(self):
    """Returns all registered protocols"""
633

634
    return list(self.query(Protocol))
635

636
637
  def has_protocol(self, name):
    """Tells if a certain protocol is available"""
638

639
    return self.query(Protocol).filter(Protocol.name == self._replace_protocol_alias(name)).count() != 0
640

641
642
643
  def protocol(self, name):
    """Returns the protocol object in the database given a certain name. Raises
    an error if that does not exist."""
644

645
    return self.query(Protocol).filter(Protocol.name == self._replace_protocol_alias(name)).one()
646

647
648
  def protocol_purposes(self):
    """Returns all registered protocol purposes"""
649

650
    return list(self.query(ProtocolPurpose))
651

652
653
  def purposes(self):
    """Returns the list of allowed purposes"""
654

655
    return ProtocolPurpose.purpose_choices