Database.py 16.5 KB
Newer Older
1
2
3
class Database:
  """This class represents the basic API for database access.
  Please use this class as a base class for your database access classes.
Manuel Günther's avatar
Manuel Günther committed
4
5
  Do not forget to call the constructor of this base class in your derived class.

6
  **Parameters:**
Manuel Günther's avatar
Manuel Günther committed
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41

  name : str
    A unique name for the database.

  original_directory : str
    The directory where the original data of the database are stored.

  original_extension : str
    The file name extension of the original data.

  annotation_directory : str
    The directory where the image annotations of the database are stored, if any.

  annotation_extension : str
    The file name extension of the annotation files.

  annotation_type : str
    The type of the annotation file to read, see :py:func:`bob.db.verification.utils.read_annotation_file` for accepted formats.

  protocol : str or ``None``
    The name of the protocol that defines the default experimental setup for this database.

    .. todo:: Check if the ``None`` protocol is supported.

  training_depends_on_protocol : bool
    Specifies, if the training set used for training the extractor and the projector depend on the protocol.
    This flag is used to avoid re-computation of data when running on the different protocols of the same database.

  models_depend_on_protocol : bool
    Specifies, if the models depend on the protocol.
    This flag is used to avoid re-computation of models when running on the different protocols of the same database.

  kwargs
    Ignored extra arguments.
  """
42
43
44
45
46
47
48
49
50
51
52
53
54
55

  def __init__(
     self,
     name,
     original_directory = None,
     original_extension = None,
     annotation_directory = None,
     annotation_extension = '.pos',
     annotation_type = None,
     protocol = 'Default',
     training_depends_on_protocol = False,
     models_depend_on_protocol = False,
     **kwargs
  ):
Manuel Günther's avatar
Manuel Günther committed
56
    assert isinstance(name, str)
57
58
59
60
61
62
63
64
65
66
67
68
69

    self.name = name
    self.original_directory = original_directory
    self.original_extension = original_extension
    self.annotation_directory = annotation_directory
    self.annotation_extension = annotation_extension
    self.annotation_type = annotation_type
    self.protocol = protocol
    self.training_depends_on_protocol = training_depends_on_protocol
    self.models_depend_on_protocol = models_depend_on_protocol


  def __str__(self):
Manuel Günther's avatar
Manuel Günther committed
70
71
72
73
74
75
76
77
78
    """__str__() -> info

    This function returns all parameters of this class.

    **Returns:**

    info : str
      A string containing the full information of all parameters of this class.
    """
79
80
81
82
83
84
85
86
87
88
89
90
91
    params = "name=%s, protocol=%s, original_directory=%s, original_extension=%s" % (self.name, self.protocol, self.original_directory, self.original_extension)
    if self.annotation_type is not None:
      params += ", annotation_type=%s" % annotation_type
      if self.annotation_directory: params += ", annotation_directory=%s" % self.annotation_directory
      params += ", annotation_extension=%s" % self.annotation_extension
    params += ", training_depends_on_protocol=%s, models_depend_on_protocol=%s" % (self.training_depends_on_protocol, self.models_depend_on_protocol)
    return "%s(%s)" % (str(self.__class__), params)


  ###########################################################################
  ### Helper functions that you might want to use in derived classes
  ###########################################################################
  def sort(self, files):
Manuel Günther's avatar
Manuel Günther committed
92
93
94
95
96
    """sort(files) -> sorted

    Returns a sorted version of the given list of File's (or other structures that define an 'id' data member).
    The files will be sorted according to their id, and duplicate entries will be removed.

97
    **Parameters:**
Manuel Günther's avatar
Manuel Günther committed
98
99
100
101
102
103
104
105
106

    files : [:py:class:`File`]
      The list of files to be uniquified and sorted.

    **Returns:**

    sorted : [:py:class:`File`]
      The sorted list of files, with duplicate :py:attr:`File.id`\s being removed.
    """
107
108
109
110
111
112
113
    # sort files using their sort function
    sorted_files = sorted(files)
    # remove duplicates
    return [f for i,f in enumerate(sorted_files) if not i or sorted_files[i-1].id != f.id]


  def arrange_by_client(self, files):
Manuel Günther's avatar
Manuel Günther committed
114
115
116
117
118
    """arrange_by_client(files) -> files_by_client

    Arranges the given list of files by client id.
    This function returns a list of lists of File's.

119
    **Parameters:**
Manuel Günther's avatar
Manuel Günther committed
120
121
122
123
124
125
126
127
128

    files : :py:class:`File`
      A list of files that should be split up by :py:attr:`File.client_id`.

    **Returns:**

    files_by_client : [[:py:class:`File`]]
      The list of lists of files, where each sub-list groups the files with the same :py:attr:`File.client_id`
    """
129
130
131
132
133
134
135
136
137
138
139
140
141
    client_files = {}
    for file in files:
      if file.client_id not in client_files:
        client_files[file.client_id] = []
      client_files[file.client_id].append(file)

    files_by_clients = []
    for client in sorted(client_files.keys()):
      files_by_clients.append(client_files[client])
    return files_by_clients


  def annotations(self, file):
Manuel Günther's avatar
Manuel Günther committed
142
143
144
145
146
    """annotations(file) -> annots

    Returns the annotations for the given File object, if available.
    It uses :py:func:`bob.db.verification.utils.read_annotation_file` to load the annotations.

147
    **Parameters:**
Manuel Günther's avatar
Manuel Günther committed
148
149
150
151
152
153
154
155
156

    file : :py:class:`File`
      The file for which annotations should be returned.

    **Returns:**

    annots : dict or None
      The annotations for the file, if available.
    """
157
158
159
160
161
162
163
164
165
166
167
168
169
170
    if self.annotation_directory:
      try:
        import bob.db.verification.utils
        annotation_path = os.path.join(self.annotation_directory, file.path + self.annotation_extension)
        return bob.db.verification.utils.read_annotation_file(annotation_path, self.annotation_type)
      except ImportError as e:
        from .. import utils
        utils.error("Cannot import bob.db.verification.utils: '%s'. No annotation is read." % e)

    return None


  def uses_probe_file_sets(self):
    """Defines if, for the current protocol, the database uses several probe files to generate a score.
Manuel Günther's avatar
Manuel Günther committed
171
    By default, ``False`` is returned. Overwrite the default if you need different behavior."""
172
173
174
175
    return False


  def file_names(self, files, directory, extension):
Manuel Günther's avatar
Manuel Günther committed
176
177
178
179
    """file_names(files, directory, extension) -> paths

    Returns the full path of the given File objects.

180
    **Parameters:**
Manuel Günther's avatar
Manuel Günther committed
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196

    files : [:py:class:`File`]
      The list of file object to retrieve the file names for.

    directory : str
      The base directory, where the files can be found.

    extension : str
      The file name extension to add to all files.

    **Returns:**

    paths : [str] or [[str]]
      The paths extracted for the files, in the same order.
      If this database provides file sets, a list of lists of file names is returned, one sub-list for each file set.
    """
197
198
199
200
201
202
203
204
205
    # return the paths of the files
    if self.uses_probe_file_sets() and files and hasattr(files[0], 'files'):
      # List of Filesets: do not remove duplicates
      return [[f.make_path(directory, extension) for f in file_set.files] for file_set in files]
    else:
      # List of files, do not remove duplicates
      return [f.make_path(directory, extension) for f in files]

  def original_file_names(self, files):
Manuel Günther's avatar
Manuel Günther committed
206
207
208
209
    """original_file_names(files) -> paths

    Returns the full path of the original data of the given File objects.

210
    **Parameters:**
Manuel Günther's avatar
Manuel Günther committed
211
212
213
214
215
216
217
218
219
220

    files : [:py:class:`File`]
      The list of file object to retrieve the original data file names for.

    **Returns:**

    paths : [str] or [[str]]
      The paths extracted for the files, in the same order.
      If this database provides file sets, a list of lists of file names is returned, one sub-list for each file set.
    """
221
222
223
224
225
226
227
228
229
230
    assert self.original_directory is not None
    assert self.original_extension is not None
    return self.file_names(files, self.original_directory, self.original_extension)


  ###########################################################################
  ### Interface functions that you need to implement in your class.
  ###########################################################################

  def all_files(self, groups = None):
Manuel Günther's avatar
Manuel Günther committed
231
232
233
234
235
    """all_files(groups=None) -> files

    Returns all files of the database.
    This function needs to be implemented in derived class implementations.

236
    **Parameters:**
Manuel Günther's avatar
Manuel Günther committed
237
238
239
240
241
242
243
244
245
246

    groups : some of ``('world', 'dev', 'eval')`` or ``None``
      The groups to get the data for.
      If ``None``, data for all groups is returned.

    **Returns:**

    files : [:py:class:`File`]
      The sorted and unique list of all files of the database.
    """
247
248
249
250
    raise NotImplementedError("Please implement this function in derived classes")


  def training_files(self, step = None, arrange_by_client = False):
Manuel Günther's avatar
Manuel Günther committed
251
252
253
254
255
    """training_files(step = None, arrange_by_client = False) -> files

    Returns all training File objects for the given step, and arranges them by client, if desired.
    This function needs to be implemented in derived class implementations.

256
    **Parameters:**
Manuel Günther's avatar
Manuel Günther committed
257
258
259
260
261
262
263
264
265

    step : one of ``('train_extractor', 'train_projector', 'train_enroller')`` or ``None``
      The step for which the training data should be returned.
      Might be ignored in derived class implementations.

    arrange_by_client : bool
      Should the training files be arranged by client?

      .. note::
266
         You can use :py:meth:`arrange_by_client` in derived class implementations to arrange the files.
Manuel Günther's avatar
Manuel Günther committed
267
268
269
270
271
272

    **Returns:**

    files : [:py:class:`File`] or [[:py:class:`File`]]
      The (arranged) list of files used for the training of the given step.
    """
273
274
275
276
    raise NotImplementedError("Please implement this function in derived classes")


  def model_ids(self, group = 'dev'):
Manuel Günther's avatar
Manuel Günther committed
277
278
279
280
281
    """model_ids(group = 'dev') -> ids

    Returns a list of model ids for the given group.
    This function needs to be implemented in derived class implementations.

282
    **Parameters:**
Manuel Günther's avatar
Manuel Günther committed
283
284
285
286
287
288
289
290
291

    group : one of ``('dev', 'eval')``
      The group to get the model ids for.

    **Returns:**

    ids : [int] or [str]
      The list of (unique) model ids for the given group.
    """
292
293
294
295
    raise NotImplementedError("Please implement this function in derived classes")


  def client_id_from_model_id(self, model_id, group = 'dev'):
Manuel Günther's avatar
Manuel Günther committed
296
297
298
299
300
301
302
    """client_id_from_model_id(model_id, group = 'dev') -> client_id

    In some databases, each client can contain several models.
    Hence, client and model ids differ.
    This function converts the given model id into its according the client id.
    This function needs to be implemented in derived class implementations.

303
    **Parameters:**
Manuel Günther's avatar
Manuel Günther committed
304
305
306
307
308
309
310
311
312
313
314
315

    model_id : int or str
      A unique ID that identifies the model for the client.

    group : one of ``('dev', 'eval')``
      The group to get the client ids for.

    **Returns:**

    client_id : [int] or [str]
      A unique ID that identifies the client, to which the model belongs.
    """
316
317
318
319
    raise NotImplementedError("Please implement this function in derived classes")


  def enroll_files(self, model_id, group = 'dev'):
Manuel Günther's avatar
Manuel Günther committed
320
321
322
323
324
    """enroll_files(model_id, group = 'dev') -> files

    Returns a list of File objects that should be used to enroll the model with the given model id from the given group.
    This function needs to be implemented in derived class implementations.

325
    **Parameters:**
Manuel Günther's avatar
Manuel Günther committed
326
327
328
329
330
331
332
333
334
335
336
337

    model_id : int or str
      A unique ID that identifies the model.

    group : one of ``('dev', 'eval')``
      The group to get the enrollment files for.

    **Returns:**

    files : [:py:class:`File`]
      The list of files used for to enroll the model with the given model id.
    """
338
339
340
341
    raise NotImplementedError("Please implement this function in derived classes")


  def probe_files(self, model_id = None, group = 'dev'):
Manuel Günther's avatar
Manuel Günther committed
342
343
344
345
346
347
348
    """probe_files(model_id = None, group = 'dev') -> files

    Returns a list of probe File objects.
    If a ``model_id`` is specified, only the probe files that should be compared with the given model id are returned (for most databases, these are all probe files of the given group).
    Otherwise, all probe files of the given group are returned.
    This function needs to be implemented in derived class implementations.

349
    **Parameters:**
Manuel Günther's avatar
Manuel Günther committed
350
351
352
353
354
355
356
357
358
359
360
361

    model_id : int or str or ``None``
      A unique ID that identifies the model.

    group : one of ``('dev', 'eval')``
      The group to get the enrollment files for.

    **Returns:**

    files : [:py:class:`File`]
      The list of files used for to probe the model with the given model id.
    """
362
363
364
365
    raise NotImplementedError("Please implement this function in derived classes")


  def probe_file_sets(self, model_id = None, group = 'dev'):
Manuel Günther's avatar
Manuel Günther committed
366
367
368
369
370
371
372
    """probe_file_sets(model_id = None, group = 'dev') -> files

    Returns a list of probe FileSet objects.
    If a ``model_id`` is specified, only the probe files that should be compared with the given model id are returned (for most databases, these are all probe files of the given group).
    Otherwise, all probe files of the given group are returned.
    This function needs to be implemented in derived class implementations, if the :py:meth:`uses_probe_file_sets` returns ``True``.

373
    **Parameters:**
Manuel Günther's avatar
Manuel Günther committed
374
375
376
377
378
379
380
381
382
383
384

    model_id : int or str or ``None``
      A unique ID that identifies the model.

    group : one of ``('dev', 'eval')``
      The group to get the enrollment files for.

    **Returns:**

    files : [:py:class:`FileSet`]
      The list of file sets used to probe the model with the given model id."""
385
386
387
388
389
390
    raise NotImplementedError("Please implement this function in derived classes")



class DatabaseZT (Database):
  """This class defines additional API functions that are required to compute ZT score normalization.
Manuel Günther's avatar
Manuel Günther committed
391
392
  This class does not define a constructor.
  During construction of a derived class, please call the constructor of the base class :py:class:`Database` directly."""
393
394

  def t_model_ids(self, group = 'dev'):
Manuel Günther's avatar
Manuel Günther committed
395
396
397
398
399
    """t_model_ids(group = 'dev') -> ids

    Returns a list of model ids of T-Norm models for the given group.
    This function needs to be implemented in derived class implementations.

400
    **Parameters:**
Manuel Günther's avatar
Manuel Günther committed
401
402
403
404
405
406
407
408
409

    group : one of ``('dev', 'eval')``
      The group to get the model ids for.

    **Returns:**

    ids : [int] or [str]
      The list of (unique) model ids for T-Norm models of the given group.
    """
410
411
    raise NotImplementedError("Please implement this function in derived classes")

Manuel Günther's avatar
Manuel Günther committed
412

413
  def client_id_from_t_model_id(self, t_model_id, group = 'dev'):
414
415
416
    """client_id_from_t_model_id(t_model_id, group = 'dev') -> client_id

    Returns the client id for the given T-Norm model id.
417
    In this base class implementation, we just use the :py:meth:`client_id_from_model_id` function.
Manuel Günther's avatar
Manuel Günther committed
418
419
    Overload this function if you need another behavior.

420
    **Parameters:**
Manuel Günther's avatar
Manuel Günther committed
421
422
423
424
425
426
427
428
429
430
431
432

    t_model_id : int or str
      A unique ID that identifies the T-Norm model.

    group : one of ``('dev', 'eval')``
      The group to get the client ids for.

    **Returns:**

    client_id : [int] or [str]
      A unique ID that identifies the client, to which the T-Norm model belongs.
    """
433
434
    return self.client_id_from_model_id(t_model_id, group)

Manuel Günther's avatar
Manuel Günther committed
435
436
437
438
439
440
  def t_enroll_files(self, t_model_id, group = 'dev'):
    """t_enroll_files(t_model_id, group = 'dev') -> files

    Returns a list of File objects that should be used to enroll the T-Norm model with the given model id from the given group.
    This function needs to be implemented in derived class implementations.

441
    **Parameters:**
Manuel Günther's avatar
Manuel Günther committed
442
443
444
445
446
447
448
449
450
451
452
453

    t_model_id : int or str
      A unique ID that identifies the model.

    group : one of ``('dev', 'eval')``
      The group to get the enrollment files for.

    **Returns:**

    files : [:py:class:`File`]
      The list of files used for to enroll the model with the given model id.
    """
454
455
    raise NotImplementedError("Please implement this function in derived classes")

Manuel Günther's avatar
Manuel Günther committed
456
457
458
459
460
461
  def z_probe_files(self, group = 'dev'):
    """z_probe_files(group = 'dev') -> files

    Returns a list of probe File objects used to compute the Z-Norm.
    This function needs to be implemented in derived class implementations.

462
    **Parameters:**
Manuel Günther's avatar
Manuel Günther committed
463
464
465
466
467
468
469
470
471

    group : one of ``('dev', 'eval')``
      The group to get the Z-norm probe files for.

    **Returns:**

    files : [:py:class:`File`]
      The unique list of files used to compute the Z-norm.
    """
472
473
    raise NotImplementedError("Please implement this function in derived classes")

Manuel Günther's avatar
Manuel Günther committed
474
475
476
477
478
479
  def z_probe_file_sets(self, group = 'dev'):
    """z_probe_file_sets(group = 'dev') -> files

    Returns a list of probe FileSet objects used to compute the Z-Norm.
    This function needs to be implemented in derived class implementations.

480
    **Parameters:**
Manuel Günther's avatar
Manuel Günther committed
481
482
483
484
485
486
487
488
489

    group : one of ``('dev', 'eval')``
      The group to get the Z-norm probe files for.

    **Returns:**

    files : [:py:class:`FileSet`]
      The unique list of file sets used to compute the Z-norm.
    """
490
    raise NotImplementedError("Please implement this function in derived classes")