verify_gmm.py 10.3 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Manuel Guenther <Manuel.Guenther@idiap.ch>
from __future__ import print_function

import sys
import argparse

import logging
logger = logging.getLogger("bob.bio.gmm")

import bob.bio.base
from .. import tools, algorithm
from bob.bio.base import tools as base_tools


def parse_arguments(command_line_parameters, exclude_resources_from = []):
  """This function parses the given options (which by default are the command line options). If exclude_resources_from is specified (as a list), the resources from the given packages are not listed in the help message."""
  # set up command line parser
  parsers = base_tools.command_line_parser(exclude_resources_from = exclude_resources_from)

  # add GMM-related options
  tools.add_parallel_gmm_options(parsers)

  # override some parameters
  parsers['config'].add_argument('-a', '--algorithm', metavar = 'x', nargs = '+', default = ['gmm'],
      help = 'Face recognition; only GMM-related algorithms are allowed')


  # Add sub-tasks that can be executed by this script
  parser = parsers['main']
  parser.add_argument('--sub-task',
      choices = ('preprocess', 'train-extractor', 'extract', 'normalize-features', 'kmeans-init', 'kmeans-e-step', 'kmeans-m-step', 'gmm-init', 'gmm-e-step', 'gmm-m-step', 'project', 'enroll', 'compute-scores', 'concatenate'),
      help = argparse.SUPPRESS) #'Executes a subtask (FOR INTERNAL USE ONLY!!!)'
  parser.add_argument('--iteration', type = int,
      help = argparse.SUPPRESS) #'Which type of models to generate (Normal or TModels)'
  parser.add_argument('--model-type', choices = ['N', 'T'],
      help = argparse.SUPPRESS) #'Which type of models to generate (Normal or TModels)'
  parser.add_argument('--score-type', choices = ['A', 'B', 'C', 'D', 'Z'],
      help = argparse.SUPPRESS) #'The type of scores that should be computed'
  parser.add_argument('--group',
      help = argparse.SUPPRESS) #'The group for which the current action should be performed'

  # now that we have set up everything, get the command line arguments
  args = base_tools.initialize(parsers, command_line_parameters,
      skips = ['preprocessing', 'extractor-training', 'extraction', 'normalization', 'kmeans', 'gmm', 'projection', 'enroller-training', 'enrollment', 'score-computation', 'concatenation', 'calibration']
  )

49
50
51
  if args.grid is None and args.parallel is None:
    raise ValueError("To be able to run the parallelized ISV script, either the --grid or the --parallel option need to be specified!")

52
53
54
55
56
57
  args.skip_projector_training = True

  # and add the GMM-related parameters
  tools.initialize_parallel_gmm(args)

  # assert that the algorithm is a GMM
58
  if tools.base(args.algorithm).__class__ not in (algorithm.GMM, algorithm.GMMRegular):
59
60
    raise ValueError("The given algorithm %s is not a (pure) GMM algorithm" % type(args.algorithm))

61
62
63
64
65
66
67
  # check if one of the parameters is given wothout the sub-task
  if args.sub_task is None:
    if args.iteration is not None: raise ValueError("The option --iteration is an internal option and cannot be used to define experiments")
    if args.model_type is not None: raise ValueError("The option --model-type is an internal option and cannot be used to define experiments")
    if args.score_type is not None: raise ValueError("The option --score-type is an internal option and cannot be used to define experiments")
    if args.group is not None: raise ValueError("The option --group is an internal option and cannot be used to define experiments; did you mean to use --groups?")

68
69
70
71
72
  return args

def add_gmm_jobs(args, job_ids, deps, submitter):
  """Adds all GMM-related jobs."""

73
74
  algorithm = tools.base(args.algorithm)

75
76
77
78
79
80
81
82
83
84
85
86
  # KMeans
  if not args.skip_kmeans:
    # initialization
    if not args.kmeans_start_iteration:
      job_ids['kmeans-init'] = submitter.submit(
              '--sub-task kmeans-init',
              name = 'k-init',
              dependencies = deps,
              **args.grid.training_queue)
      deps.append(job_ids['kmeans-init'])

    # several iterations of E and M steps
87
    for iteration in range(args.kmeans_start_iteration, algorithm.kmeans_training_iterations):
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
      # E-step
      job_ids['kmeans-e-step'] = submitter.submit(
              '--sub-task kmeans-e-step --iteration %d' % iteration,
              name='k-e-%d' % iteration,
              number_of_parallel_jobs = args.grid.number_of_projection_jobs,
              dependencies = [job_ids['kmeans-m-step']] if iteration != args.kmeans_start_iteration else deps,
              **args.grid.projection_queue)

      # M-step
      job_ids['kmeans-m-step'] = submitter.submit(
              '--sub-task kmeans-m-step --iteration %d' % iteration,
              name='k-m-%d' % iteration,
              dependencies = [job_ids['kmeans-e-step']],
              **args.grid.training_queue)

    # add dependence to the last m step
    deps.append(job_ids['kmeans-m-step'])

  # GMM
  if not args.skip_gmm:
    # initialization
    if not args.gmm_start_iteration:
      job_ids['gmm-init'] = submitter.submit(
              '--sub-task gmm-init',
              name = 'g-init',
              dependencies = deps,
              **args.grid.training_queue)
      deps.append(job_ids['gmm-init'])

    # several iterations of E and M steps
118
    for iteration in range(args.gmm_start_iteration, algorithm.gmm_training_iterations):
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
      # E-step
      job_ids['gmm-e-step'] = submitter.submit(
              '--sub-task gmm-e-step --iteration %d' % iteration,
              name='g-e-%d' % iteration,
              number_of_parallel_jobs = args.grid.number_of_projection_jobs,
              dependencies = [job_ids['gmm-m-step']] if iteration != args.gmm_start_iteration else deps,
              **args.grid.projection_queue)

      # M-step
      job_ids['gmm-m-step'] = submitter.submit(
              '--sub-task gmm-m-step --iteration %d' % iteration,
              name='g-m-%d' % iteration,
              dependencies = [job_ids['gmm-e-step']],
              **args.grid.training_queue)

    # add dependence to the last m step
    deps.append(job_ids['gmm-m-step'])
  return job_ids, deps




def execute(args):
  """Run the desired job of the tool chain that is specified on command line.
  This job might be executed either in the grid, or locally."""

  # first, let the base script decide if it knows how to execute the job
  if bob.bio.base.script.verify.execute(args):
    return True

  # now, check what we can do
150
  algorithm = tools.base(args.algorithm)
151
152
153
154
155
156
157

  # the file selector object
  fs = tools.FileSelector.instance()

  # train the feature projector
  if args.sub_task == 'kmeans-init':
    tools.kmeans_initialize(
158
        algorithm,
159
160
        args.extractor,
        args.limit_training_data,
161
        allow_missing_files = args.allow_missing_files,
162
163
164
165
166
        force = args.force)

  # train the feature projector
  elif args.sub_task == 'kmeans-e-step':
    tools.kmeans_estep(
167
        algorithm,
168
169
        args.extractor,
        args.iteration,
170
        allow_missing_files = args.allow_missing_files,
171
172
173
174
175
176
        indices = base_tools.indices(fs.training_list('extracted', 'train_projector'), args.grid.number_of_projection_jobs),
        force = args.force)

  # train the feature projector
  elif args.sub_task == 'kmeans-m-step':
    tools.kmeans_mstep(
177
        algorithm,
178
179
180
181
182
183
184
        args.iteration,
        number_of_parallel_jobs = args.grid.number_of_projection_jobs,
        clean = args.clean_intermediate,
        force = args.force)

  elif args.sub_task == 'gmm-init':
    tools.gmm_initialize(
185
        algorithm,
186
187
        args.extractor,
        args.limit_training_data,
188
        allow_missing_files = args.allow_missing_files,
189
190
191
192
193
        force = args.force)

  # train the feature projector
  elif args.sub_task == 'gmm-e-step':
    tools.gmm_estep(
194
        algorithm,
195
196
        args.extractor,
        args.iteration,
197
        allow_missing_files = args.allow_missing_files,
198
199
200
201
202
203
        indices = base_tools.indices(fs.training_list('extracted', 'train_projector'), args.grid.number_of_projection_jobs),
        force = args.force)

  # train the feature projector
  elif args.sub_task == 'gmm-m-step':
    tools.gmm_mstep(
204
        algorithm,
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
        args.iteration,
        number_of_parallel_jobs = args.grid.number_of_projection_jobs,
        clean = args.clean_intermediate,
        force = args.force)
  else:
    # Not our keyword...
    return False
  return True



def verify(args, command_line_parameters, external_fake_job_id = 0):
  """This is the main entry point for computing verification experiments.
  You just have to specify configurations for any of the steps of the toolchain, which are:
  -- the database
  -- the preprocessing
  -- feature extraction
  -- the recognition algorithm
  -- and the grid configuration.
  Additionally, you can skip parts of the toolchain by selecting proper --skip-... parameters.
  If your probe files are not too big, you can also specify the --preload-probes switch to speed up the score computation.
  If files should be re-generated, please specify the --force option (might be combined with the --skip-... options)."""


  # as the main entry point, check whether the sub-task is specified
  if args.sub_task is not None:
    # execute the desired sub-task
    if not execute(args):
      raise ValueError("The specified --sub-task '%s' is not known to the system" % args.sub_task)
    return {}
  else:
    # add jobs
237
    submitter = base_tools.GridSubmission(args, command_line_parameters, executable = 'verify_gmm.py', first_fake_job_id = 0)
238
    retval = tools.add_jobs(args, submitter, local_job_adder = add_gmm_jobs)
239
    base_tools.write_info(args, command_line_parameters, submitter.executable)
240
241
242
243
244
245
246
247
248
249
250
251
252
253

    if args.grid.is_local() and args.run_local_scheduler:
      if args.dry_run:
        print ("Would have started the local scheduler to run the experiments with parallel jobs")
      else:
        # start the jman local deamon
        submitter.execute_local()
      return {}

    else:
      # return job ids as a dictionary
      return retval


254
def main(command_line_parameters = None):
255
256
257
  """Executes the main function"""
  try:
    # do the command line parsing
258
    args = parse_arguments(command_line_parameters)
259
260
261
262
263
264
265
266
267
268

    # perform face verification test
    verify(args, command_line_parameters)
  except Exception as e:
    # track any exceptions as error logs (i.e., to get a time stamp)
    logger.error("During the execution, an exception was raised: %s" % e)
    raise

if __name__ == "__main__":
  main()