verify_ivector.py 14.4 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Manuel Guenther <Manuel.Guenther@idiap.ch>
from __future__ import print_function

import sys
import argparse

import logging
logger = logging.getLogger("bob.bio.gmm")

import bob.bio.base
from .. import tools, algorithm
from bob.bio.base import tools as base_tools


def parse_arguments(command_line_parameters, exclude_resources_from = []):
  """This function parses the given options (which by default are the command line options). If exclude_resources_from is specified (as a list), the resources from the given packages are not listed in the help message."""
  # set up command line parser
  parsers = base_tools.command_line_parser(exclude_resources_from = exclude_resources_from)

  # add GMM-related options
  tools.add_parallel_gmm_options(parsers, sub_module = 'ivector')

  # override some parameters
26
  parsers['config'].add_argument('-a', '--algorithm', metavar = 'x', nargs = '+', default = ['ivector-cosine'],
27
28
29
30
31
32
      help = 'Face recognition; only GMM-related algorithms are allowed')


  # Add sub-tasks that can be executed by this script
  parser = parsers['main']
  parser.add_argument('--sub-task',
33
      choices = ('preprocess', 'train-extractor', 'extract', 'normalize-features', 'kmeans-init', 'kmeans-e-step', 'kmeans-m-step', 'gmm-init', 'gmm-e-step', 'gmm-m-step', 'gmm-project', 'ivector-e-step', 'ivector-m-step', 'ivector-training', 'ivector-projection', 'train-whitener', 'whitening-projection', 'train-lda', 'lda-projection', 'train-wccn', 'wccn-projection',  'project', 'train-plda',  'save-projector', 'enroll', 'compute-scores', 'concatenate'),
34
35
36
37
38
39
40
41
42
43
44
45
      help = argparse.SUPPRESS) #'Executes a subtask (FOR INTERNAL USE ONLY!!!)'
  parser.add_argument('--iteration', type = int,
      help = argparse.SUPPRESS) #'Which type of models to generate (Normal or TModels)'
  parser.add_argument('--model-type', choices = ['N', 'T'],
      help = argparse.SUPPRESS) #'Which type of models to generate (Normal or TModels)'
  parser.add_argument('--score-type', choices = ['A', 'B', 'C', 'D', 'Z'],
      help = argparse.SUPPRESS) #'The type of scores that should be computed'
  parser.add_argument('--group',
      help = argparse.SUPPRESS) #'The group for which the current action should be performed'

  # now that we have set up everything, get the command line arguments
  args = base_tools.initialize(parsers, command_line_parameters,
46
      skips = ['preprocessing', 'extractor-training', 'extraction', 'normalization', 'kmeans', 'gmm', 'ivector-training', 'ivector-projection', 'train-whitener', 'whitening-projection', 'train-lda', 'lda-projection', 'train-wccn', 'wccn-projection',  'projection', 'train-plda', 'enroller-training', 'enrollment', 'score-computation', 'concatenation', 'calibration']
47
48
  )

49
50
51
  if args.grid is None and args.parallel is None:
    raise ValueError("To be able to run the parallelized ISV script, either the --grid or the --parallel option need to be specified!")

52
53
54
55
56
57
  args.skip_projector_training = True

  # and add the GMM-related parameters
  tools.initialize_parallel_gmm(args, sub_module = 'ivector')

  # assert that the algorithm is a GMM
58
  if tools.base(args.algorithm).__class__ != algorithm.IVector:
59
60
    raise ValueError("The given algorithm %s is not a (pure) IVector algorithm" % type(args.algorithm))

61
62
63
64
65
66
67
  # check if one of the parameters is given wothout the sub-task
  if args.sub_task is None:
    if args.iteration is not None: raise ValueError("The option --iteration is an internal option and cannot be used to define experiments")
    if args.model_type is not None: raise ValueError("The option --model-type is an internal option and cannot be used to define experiments")
    if args.score_type is not None: raise ValueError("The option --score-type is an internal option and cannot be used to define experiments")
    if args.group is not None: raise ValueError("The option --group is an internal option and cannot be used to define experiments; did you mean to use --groups?")

68
69
70
71
72
73
74
75
76
77
78
  return args

from .verify_gmm import add_gmm_jobs

def add_ivector_jobs(args, job_ids, deps, submitter):
  """Adds all GMM-related jobs."""

  # first, add gmm jobs
  job_ids, deps = add_gmm_jobs(args, job_ids, deps, submitter)

  # now, add the extra steps for ivector
79
80
  algorithm = tools.base(args.algorithm)

81
  if not args.skip_ivector_training:
82
83
84
85
86
87
88
89
90
91
    # gmm projection
    job_ids['gmm-projection'] = submitter.submit(
            '--sub-task gmm-project',
            name = 'pro-gmm',
            number_of_parallel_jobs = args.grid.number_of_projection_jobs,
            dependencies = deps,
            **args.grid.projection_queue)
    deps.append(job_ids['gmm-projection'])

    # several iterations of E and M steps
92
    for iteration in range(args.tv_start_iteration, algorithm.tv_training_iterations):
93
94
95
96
97
98
      # E-step
      job_ids['ivector-e-step'] = submitter.submit(
              '--sub-task ivector-e-step --iteration %d' % iteration,
              name='i-e-%d' % iteration,
              number_of_parallel_jobs = args.grid.number_of_projection_jobs,
              dependencies = [job_ids['ivector-m-step']] if iteration != args.tv_start_iteration else deps,
99
              allow_missing_files = args.allow_missing_files,
100
101
102
103
104
105
106
107
108
109
              **args.grid.projection_queue)

      # M-step
      job_ids['ivector-m-step'] = submitter.submit(
              '--sub-task ivector-m-step --iteration %d' % iteration,
              name='i-m-%d' % iteration,
              dependencies = [job_ids['ivector-e-step']],
              **args.grid.training_queue)
    deps.append(job_ids['ivector-m-step'])

110
111
112

  # ivector projection
  if not args.skip_ivector_projection:
113
    job_ids['ivector-projection'] = submitter.submit(
114
            '--sub-task ivector-projection',
115
116
117
            name = 'pro-ivector',
            number_of_parallel_jobs = args.grid.number_of_projection_jobs,
            dependencies = deps,
118
            allow_missing_files = args.allow_missing_files,
119
120
121
            **args.grid.projection_queue)
    deps.append(job_ids['ivector-projection'])

122
123
  # train whitener
  if not args.skip_train_whitener:
124
125
126
127
    job_ids['whitener-training'] = submitter.submit(
            '--sub-task train-whitener',
            name = 'train-whitener',
            dependencies = deps,
128
            allow_missing_files = args.allow_missing_files,
129
130
131
            **args.grid.training_queue)
    deps.append(job_ids['whitener-training'])

132
133
134
135
136
137
138
  # whitening projection
  if not args.skip_whitening_projection:
    job_ids['whitening-projection'] = submitter.submit(
            '--sub-task whitening-projection',
            name = 'whitened',
            number_of_parallel_jobs = args.grid.number_of_projection_jobs,
            dependencies = deps,
139
            allow_missing_files = args.allow_missing_files,
140
141
142
143
144
145
146
147
148
            **args.grid.projection_queue)
    deps.append(job_ids['whitening-projection'])

  # train LDA
  if not args.skip_train_lda:
    job_ids['lda-training'] = submitter.submit(
            '--sub-task train-lda',
            name = 'train-lda',
            dependencies = deps,
149
            allow_missing_files = args.allow_missing_files,
150
151
152
153
154
155
156
157
158
159
            **args.grid.training_queue)
    deps.append(job_ids['lda-training'])

  # LDA projection
  if not args.skip_lda_projection:
    job_ids['lda-projection'] = submitter.submit(
            '--sub-task lda-projection',
            name = 'lda_projection',
            number_of_parallel_jobs = args.grid.number_of_projection_jobs,
            dependencies = deps,
160
            allow_missing_files = args.allow_missing_files,
161
162
163
164
165
166
167
168
169
            **args.grid.projection_queue)
    deps.append(job_ids['lda-projection'])

  # train WCCN
  if not args.skip_train_wccn:
    job_ids['wccn-training'] = submitter.submit(
            '--sub-task train-wccn',
            name = 'train-wccn',
            dependencies = deps,
170
            allow_missing_files = args.allow_missing_files,
171
172
173
174
175
176
177
178
179
180
            **args.grid.training_queue)
    deps.append(job_ids['wccn-training'])

  # WCCN projection
  if not args.skip_wccn_projection:
    job_ids['wccn-projection'] = submitter.submit(
            '--sub-task wccn-projection',
            name = 'wccn_projection',
            number_of_parallel_jobs = args.grid.number_of_projection_jobs,
            dependencies = deps,
181
            allow_missing_files = args.allow_missing_files,
182
183
184
185
186
187
188
189
            **args.grid.projection_queue)
    deps.append(job_ids['wccn-projection'])

  # train PLDA
  if not args.skip_train_plda:
    job_ids['plda-training'] = submitter.submit(
            '--sub-task train-plda',
            name = 'train-plda',
190
            allow_missing_files = args.allow_missing_files,
191
192
193
            dependencies = deps,
            **args.grid.training_queue)
    deps.append(job_ids['plda-training'])
194

195
196
197
198
199
200
201
   # train PLDA
  job_ids['save-projector'] = submitter.submit(
          '--sub-task save-projector',
          name = 'save-projector',
          dependencies = deps,
          **args.grid.training_queue)
  deps.append(job_ids['save-projector'])
202

203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
  return job_ids, deps


from .verify_gmm import execute as gmm_execute


def execute(args):
  """Run the desired job of the tool chain that is specified on command line.
  This job might be executed either in the grid, or locally."""

  # first, let the base script decide if it knows how to execute the job
  if gmm_execute(args):
    return True

  # now, check what we can do
218
  algorithm = tools.base(args.algorithm)
219

220
221
222
223
224
  # the file selector object
  fs = tools.FileSelector.instance()

  if args.sub_task == 'gmm-project':
    tools.gmm_project(
225
        algorithm,
226
227
        args.extractor,
        indices = base_tools.indices(fs.training_list('extracted', 'train_projector'), args.grid.number_of_projection_jobs),
228
        allow_missing_files = args.allow_missing_files,
229
230
231
232
        force = args.force)

  elif args.sub_task == 'ivector-e-step':
    tools.ivector_estep(
233
        algorithm,
234
        args.iteration,
235
        allow_missing_files = args.allow_missing_files,
236
237
238
239
240
241
        indices = base_tools.indices(fs.training_list('projected_gmm', 'train_projector'), args.grid.number_of_projection_jobs),
        force = args.force)

  # train the feature projector
  elif args.sub_task == 'ivector-m-step':
    tools.ivector_mstep(
242
        algorithm,
243
244
245
246
247
        args.iteration,
        number_of_parallel_jobs = args.grid.number_of_projection_jobs,
        clean = args.clean_intermediate,
        force = args.force)

248
  elif args.sub_task == 'ivector-projection':
249
    tools.ivector_project(
250
        algorithm,
251
        allow_missing_files = args.allow_missing_files,
252
253
254
255
256
        indices = base_tools.indices(fs.training_list('projected_gmm', 'train_projector'), args.grid.number_of_projection_jobs),
        force = args.force)

  elif args.sub_task == 'train-whitener':
    tools.train_whitener(
257
        algorithm,
258
        allow_missing_files = args.allow_missing_files,
259
260
        force = args.force)

261
262
263
  elif args.sub_task == 'whitening-projection':
    tools.whitening_project(
        algorithm,
264
        allow_missing_files = args.allow_missing_files,
265
266
267
268
269
270
271
        indices = base_tools.indices(fs.training_list('projected_gmm', 'train_projector'), args.grid.number_of_projection_jobs),
        force = args.force)

  elif args.sub_task == 'train-lda':
    if algorithm.use_lda:
      tools.train_lda(
          algorithm,
272
          allow_missing_files = args.allow_missing_files,
273
274
275
276
277
278
          force = args.force)

  elif args.sub_task == 'lda-projection':
    if algorithm.use_lda:
      tools.lda_project(
          algorithm,
279
          allow_missing_files = args.allow_missing_files,
280
281
282
283
284
285
286
          indices = base_tools.indices(fs.training_list('projected_gmm', 'train_projector'), args.grid.number_of_projection_jobs),
          force = args.force)

  elif args.sub_task == 'train-wccn':
    if algorithm.use_wccn:
      tools.train_wccn(
          algorithm,
287
          allow_missing_files = args.allow_missing_files,
288
289
290
291
292
293
          force = args.force)

  elif args.sub_task == 'wccn-projection':
    if algorithm.use_wccn:
      tools.wccn_project(
          algorithm,
294
          allow_missing_files = args.allow_missing_files,
295
296
297
298
299
300
301
          indices = base_tools.indices(fs.training_list('projected_gmm', 'train_projector'), args.grid.number_of_projection_jobs),
          force = args.force)

  elif args.sub_task == 'train-plda':
    if algorithm.use_plda:
      tools.train_plda(
          algorithm,
302
          allow_missing_files = args.allow_missing_files,
303
          force = args.force)
304

305
306
307
308
  elif args.sub_task == 'save-projector':
    tools.save_projector(
        algorithm,
        force=args.force)
309
    # Not our keyword...
310
  else:
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
    return False
  return True


def verify(args, command_line_parameters, external_fake_job_id = 0):
  """This is the main entry point for computing verification experiments.
  You just have to specify configurations for any of the steps of the toolchain, which are:
  -- the database
  -- the preprocessing
  -- feature extraction
  -- the recognition algorithm
  -- and the grid configuration.
  Additionally, you can skip parts of the toolchain by selecting proper --skip-... parameters.
  If your probe files are not too big, you can also specify the --preload-probes switch to speed up the score computation.
  If files should be re-generated, please specify the --force option (might be combined with the --skip-... options)."""


  # as the main entry point, check whether the sub-task is specified
  if args.sub_task is not None:
    # execute the desired sub-task
    if not execute(args):
      raise ValueError("The specified --sub-task '%s' is not known to the system" % args.sub_task)
    return {}
  else:
    # add jobs
336
    submitter = base_tools.GridSubmission(args, command_line_parameters, executable = 'verify_ivector.py', first_fake_job_id = 0)
337
    retval = tools.add_jobs(args, submitter, local_job_adder = add_ivector_jobs)
338
    base_tools.write_info(args, command_line_parameters, submitter.executable)
339
340
341
342
343
344
345
346
347
348
349
350
351
352

    if args.grid.is_local() and args.run_local_scheduler:
      if args.dry_run:
        print ("Would have started the local scheduler to run the experiments with parallel jobs")
      else:
        # start the jman local deamon
        submitter.execute_local()
      return {}

    else:
      # return job ids as a dictionary
      return retval


353
def main(command_line_parameters = None):
354
355
356
  """Executes the main function"""
  try:
    # do the command line parsing
357
    args = parse_arguments(command_line_parameters)
358
359
360
361
362
363
364
365
366
367

    # perform face verification test
    verify(args, command_line_parameters)
  except Exception as e:
    # track any exceptions as error logs (i.e., to get a time stamp)
    logger.error("During the execution, an exception was raised: %s" % e)
    raise

if __name__ == "__main__":
  main()