From 5595dfc064e823d0ff1a4f7686d7d21a981a941c Mon Sep 17 00:00:00 2001
From: Manuel Guenther <manuel.guenther@idiap.ch>
Date: Thu, 18 Jun 2015 16:07:06 +0200
Subject: [PATCH] Made parallel scripts usable for video experiments

---
 bob/bio/gmm/config/algorithm/gmm.py  |  3 ---
 bob/bio/gmm/config/algorithm/isv.py  |  2 --
 bob/bio/gmm/script/verify_gmm.py     | 21 ++++++++++++---------
 bob/bio/gmm/script/verify_isv.py     |  7 ++++---
 bob/bio/gmm/script/verify_ivector.py | 17 ++++++++++-------
 bob/bio/gmm/tools/gmm.py             | 11 ++++++-----
 bob/bio/gmm/tools/utils.py           | 26 ++++++++++++++++++++++++++
 buildout.cfg                         |  2 +-
 8 files changed, 59 insertions(+), 30 deletions(-)

diff --git a/bob/bio/gmm/config/algorithm/gmm.py b/bob/bio/gmm/config/algorithm/gmm.py
index 592b30e..87b43ac 100644
--- a/bob/bio/gmm/config/algorithm/gmm.py
+++ b/bob/bio/gmm/config/algorithm/gmm.py
@@ -1,7 +1,4 @@
-#!/usr/bin/env python
-
 import bob.bio.gmm
-import numpy
 
 algorithm = bob.bio.gmm.algorithm.GMM(
     number_of_gaussians = 512,
diff --git a/bob/bio/gmm/config/algorithm/isv.py b/bob/bio/gmm/config/algorithm/isv.py
index 3ae069d..390e829 100644
--- a/bob/bio/gmm/config/algorithm/isv.py
+++ b/bob/bio/gmm/config/algorithm/isv.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env python
-
 import bob.bio.gmm
 
 algorithm = bob.bio.gmm.algorithm.ISV(
diff --git a/bob/bio/gmm/script/verify_gmm.py b/bob/bio/gmm/script/verify_gmm.py
index c1683b2..8501ecc 100644
--- a/bob/bio/gmm/script/verify_gmm.py
+++ b/bob/bio/gmm/script/verify_gmm.py
@@ -55,7 +55,7 @@ def parse_arguments(command_line_parameters, exclude_resources_from = []):
   tools.initialize_parallel_gmm(args)
 
   # assert that the algorithm is a GMM
-  if args.algorithm.__class__ not in (algorithm.GMM, algorithm.GMMRegular):
+  if tools.base(args.algorithm).__class__ not in (algorithm.GMM, algorithm.GMMRegular):
     raise ValueError("The given algorithm %s is not a (pure) GMM algorithm" % type(args.algorithm))
 
   return args
@@ -63,6 +63,8 @@ def parse_arguments(command_line_parameters, exclude_resources_from = []):
 def add_gmm_jobs(args, job_ids, deps, submitter):
   """Adds all GMM-related jobs."""
 
+  algorithm = tools.base(args.algorithm)
+
   # KMeans
   if not args.skip_kmeans:
     # initialization
@@ -75,7 +77,7 @@ def add_gmm_jobs(args, job_ids, deps, submitter):
       deps.append(job_ids['kmeans-init'])
 
     # several iterations of E and M steps
-    for iteration in range(args.kmeans_start_iteration, args.algorithm.kmeans_training_iterations):
+    for iteration in range(args.kmeans_start_iteration, algorithm.kmeans_training_iterations):
       # E-step
       job_ids['kmeans-e-step'] = submitter.submit(
               '--sub-task kmeans-e-step --iteration %d' % iteration,
@@ -106,7 +108,7 @@ def add_gmm_jobs(args, job_ids, deps, submitter):
       deps.append(job_ids['gmm-init'])
 
     # several iterations of E and M steps
-    for iteration in range(args.gmm_start_iteration, args.algorithm.gmm_training_iterations):
+    for iteration in range(args.gmm_start_iteration, algorithm.gmm_training_iterations):
       # E-step
       job_ids['gmm-e-step'] = submitter.submit(
               '--sub-task gmm-e-step --iteration %d' % iteration,
@@ -138,6 +140,7 @@ def execute(args):
     return True
 
   # now, check what we can do
+  algorithm = tools.base(args.algorithm)
 
   # the file selector object
   fs = tools.FileSelector.instance()
@@ -145,7 +148,7 @@ def execute(args):
   # train the feature projector
   if args.sub_task == 'kmeans-init':
     tools.kmeans_initialize(
-        args.algorithm,
+        algorithm,
         args.extractor,
         args.limit_training_data,
         force = args.force)
@@ -153,7 +156,7 @@ def execute(args):
   # train the feature projector
   elif args.sub_task == 'kmeans-e-step':
     tools.kmeans_estep(
-        args.algorithm,
+        algorithm,
         args.extractor,
         args.iteration,
         indices = base_tools.indices(fs.training_list('extracted', 'train_projector'), args.grid.number_of_projection_jobs),
@@ -162,7 +165,7 @@ def execute(args):
   # train the feature projector
   elif args.sub_task == 'kmeans-m-step':
     tools.kmeans_mstep(
-        args.algorithm,
+        algorithm,
         args.iteration,
         number_of_parallel_jobs = args.grid.number_of_projection_jobs,
         clean = args.clean_intermediate,
@@ -170,7 +173,7 @@ def execute(args):
 
   elif args.sub_task == 'gmm-init':
     tools.gmm_initialize(
-        args.algorithm,
+        algorithm,
         args.extractor,
         args.limit_training_data,
         force = args.force)
@@ -178,7 +181,7 @@ def execute(args):
   # train the feature projector
   elif args.sub_task == 'gmm-e-step':
     tools.gmm_estep(
-        args.algorithm,
+        algorithm,
         args.extractor,
         args.iteration,
         indices = base_tools.indices(fs.training_list('extracted', 'train_projector'), args.grid.number_of_projection_jobs),
@@ -187,7 +190,7 @@ def execute(args):
   # train the feature projector
   elif args.sub_task == 'gmm-m-step':
     tools.gmm_mstep(
-        args.algorithm,
+        algorithm,
         args.iteration,
         number_of_parallel_jobs = args.grid.number_of_projection_jobs,
         clean = args.clean_intermediate,
diff --git a/bob/bio/gmm/script/verify_isv.py b/bob/bio/gmm/script/verify_isv.py
index cd65f55..1c55f65 100644
--- a/bob/bio/gmm/script/verify_isv.py
+++ b/bob/bio/gmm/script/verify_isv.py
@@ -55,7 +55,7 @@ def parse_arguments(command_line_parameters, exclude_resources_from = []):
   tools.initialize_parallel_gmm(args, sub_module = 'isv')
 
   # assert that the algorithm is a GMM
-  if args.algorithm.__class__ != algorithm.ISV:
+  if tools.base(args.algorithm).__class__ != algorithm.ISV:
     raise ValueError("The given algorithm %s is not a (pure) ISV algorithm" % type(args.algorithm))
 
   return args
@@ -101,13 +101,14 @@ def execute(args):
     return True
 
   # now, check what we can do
+  algorithm = tools.base(args.algorithm)
 
   # the file selector object
   fs = tools.FileSelector.instance()
 
   if args.sub_task == 'gmm-project':
     tools.gmm_project(
-        args.algorithm,
+        algorithm,
         args.extractor,
         indices = base_tools.indices(fs.training_list('extracted', 'train_projector'), args.grid.number_of_projection_jobs),
         force = args.force)
@@ -115,7 +116,7 @@ def execute(args):
   # train the feature projector
   elif args.sub_task == 'train-isv':
     tools.train_isv(
-        args.algorithm,
+        algorithm,
         force = args.force)
 
   else:
diff --git a/bob/bio/gmm/script/verify_ivector.py b/bob/bio/gmm/script/verify_ivector.py
index 5081c5a..fbe2f87 100644
--- a/bob/bio/gmm/script/verify_ivector.py
+++ b/bob/bio/gmm/script/verify_ivector.py
@@ -55,7 +55,7 @@ def parse_arguments(command_line_parameters, exclude_resources_from = []):
   tools.initialize_parallel_gmm(args, sub_module = 'ivector')
 
   # assert that the algorithm is a GMM
-  if args.algorithm.__class__ != algorithm.IVector:
+  if tools.base(args.algorithm).__class__ != algorithm.IVector:
     raise ValueError("The given algorithm %s is not a (pure) IVector algorithm" % type(args.algorithm))
 
   return args
@@ -69,6 +69,8 @@ def add_ivector_jobs(args, job_ids, deps, submitter):
   job_ids, deps = add_gmm_jobs(args, job_ids, deps, submitter)
 
   # now, add the extra steps for ivector
+  algorithm = tools.base(args.algorithm)
+
   if not args.skip_ivector:
     # gmm projection
     job_ids['gmm-projection'] = submitter.submit(
@@ -80,7 +82,7 @@ def add_ivector_jobs(args, job_ids, deps, submitter):
     deps.append(job_ids['gmm-projection'])
 
     # several iterations of E and M steps
-    for iteration in range(args.tv_start_iteration, args.algorithm.tv_training_iterations):
+    for iteration in range(args.tv_start_iteration, algorithm.tv_training_iterations):
       # E-step
       job_ids['ivector-e-step'] = submitter.submit(
               '--sub-task ivector-e-step --iteration %d' % iteration,
@@ -131,20 +133,21 @@ def execute(args):
     return True
 
   # now, check what we can do
+  algorithm = tools.base(args.algorithm)
 
   # the file selector object
   fs = tools.FileSelector.instance()
 
   if args.sub_task == 'gmm-project':
     tools.gmm_project(
-        args.algorithm,
+        algorithm,
         args.extractor,
         indices = base_tools.indices(fs.training_list('extracted', 'train_projector'), args.grid.number_of_projection_jobs),
         force = args.force)
 
   elif args.sub_task == 'ivector-e-step':
     tools.ivector_estep(
-        args.algorithm,
+        algorithm,
         args.iteration,
         indices = base_tools.indices(fs.training_list('projected_gmm', 'train_projector'), args.grid.number_of_projection_jobs),
         force = args.force)
@@ -152,7 +155,7 @@ def execute(args):
   # train the feature projector
   elif args.sub_task == 'ivector-m-step':
     tools.ivector_mstep(
-        args.algorithm,
+        algorithm,
         args.iteration,
         number_of_parallel_jobs = args.grid.number_of_projection_jobs,
         clean = args.clean_intermediate,
@@ -160,13 +163,13 @@ def execute(args):
 
   elif args.sub_task == 'ivector-project':
     tools.ivector_project(
-        args.algorithm,
+        algorithm,
         indices = base_tools.indices(fs.training_list('projected_gmm', 'train_projector'), args.grid.number_of_projection_jobs),
         force = args.force)
 
   elif args.sub_task == 'train-whitener':
     tools.train_whitener(
-        args.algorithm,
+        algorithm,
         force = args.force)
 
   else:
diff --git a/bob/bio/gmm/tools/gmm.py b/bob/bio/gmm/tools/gmm.py
index fba7ee8..dd18cce 100644
--- a/bob/bio/gmm/tools/gmm.py
+++ b/bob/bio/gmm/tools/gmm.py
@@ -9,6 +9,7 @@ logger = logging.getLogger("bob.bio.gmm")
 
 from bob.bio.base.tools.FileSelector import FileSelector
 from bob.bio.base import utils, tools
+from .utils import read_feature
 
 
 def kmeans_initialize(algorithm, extractor, limit_data = None, force = False):
@@ -23,7 +24,7 @@ def kmeans_initialize(algorithm, extractor, limit_data = None, force = False):
     # read data
     logger.info("UBM training: initializing kmeans")
     training_list = utils.selected_elements(fs.training_list('extracted', 'train_projector'), limit_data)
-    data = numpy.vstack([extractor.read_feature(feature_file) for feature_file in training_list])
+    data = numpy.vstack([read_feature(extractor, feature_file) for feature_file in training_list])
 
     # Perform KMeans initialization
     kmeans_machine = bob.learn.em.KMeansMachine(algorithm.gaussians, data.shape[1])
@@ -55,7 +56,7 @@ def kmeans_estep(algorithm, extractor, iteration, indices, force=False):
     logger.info("UBM training: KMeans E-Step round %d from range(%d, %d)", iteration, *indices)
 
     # read data
-    data = numpy.vstack([extractor.read_feature(training_list[index]) for index in range(indices[0], indices[1])])
+    data = numpy.vstack([read_feature(extractor, training_list[index]) for index in range(indices[0], indices[1])])
 
     # Performs the E-step
     trainer = algorithm.kmeans_trainer
@@ -169,7 +170,7 @@ def gmm_initialize(algorithm, extractor, limit_data = None, force = False):
 
     # read features
     training_list = utils.selected_elements(fs.training_list('extracted', 'train_projector'), limit_data)
-    data = numpy.vstack([extractor.read_feature(feature_file) for feature_file in training_list])
+    data = numpy.vstack([read_feature(extractor, feature_file) for feature_file in training_list])
 
     # get means and variances of kmeans result
     kmeans_machine = bob.learn.em.KMeansMachine(bob.io.base.HDF5File(fs.kmeans_file))
@@ -209,7 +210,7 @@ def gmm_estep(algorithm, extractor, iteration, indices, force=False):
     logger.info("UBM training: GMM E-Step from range(%d, %d)", indices)
 
     # read data
-    data = numpy.vstack([extractor.read_feature(training_list[index]) for index in range(indices[0], indices[1])])
+    data = numpy.vstack([read_feature(extractor, training_list[index]) for index in range(indices[0], indices[1])])
     trainer = algorithm.ubm_trainer
     trainer.initialize(gmm_machine, None)
 
@@ -299,7 +300,7 @@ def gmm_project(algorithm, extractor, indices, force=False):
 
     if not utils.check_file(projected_file, force):
       # load feature
-      feature = extractor.read_feature(feature_file)
+      feature = read_feature(extractor, feature_file)
       # project feature
       projected = algorithm.project_ubm(feature)
       # write it
diff --git a/bob/bio/gmm/tools/utils.py b/bob/bio/gmm/tools/utils.py
index fe773e0..146fb50 100644
--- a/bob/bio/gmm/tools/utils.py
+++ b/bob/bio/gmm/tools/utils.py
@@ -1,4 +1,5 @@
 import bob.bio.base
+import numpy
 
 def add_jobs(args, submitter, local_job_adder):
   """Adds all (desired) jobs of the tool chain to the grid, or to the local list to be executed."""
@@ -47,3 +48,28 @@ def add_jobs(args, submitter, local_job_adder):
     setattr(args, "skip_%s" % key, original_skips[key])
 
   return job_ids
+
+
+def is_video_extension(algorithm):
+  try:
+    import bob.bio.video
+    if isinstance(algorithm, bob.bio.video.algorithm.Algorithm):
+      return True
+  except ImportError:
+    pass
+  return False
+
+def base(algorithm):
+  """Returns the base algorithm, if it is a video extension, otherwise returns the algorithm itself"""
+  return algorithm.algorithm if is_video_extension(algorithm) else algorithm
+
+def read_feature(extractor, feature_file):
+  feature = extractor.read_feature(feature_file)
+  try:
+    import bob.bio.video
+    if isinstance(extractor, bob.bio.video.extractor.Extractor):
+      assert isinstance(feature, bob.bio.video.FrameContainer)
+      return numpy.vstack([frame for _,frame,_ in feature])
+  except ImportError:
+    pass
+  return feature
diff --git a/buildout.cfg b/buildout.cfg
index 3771d80..0710f05 100644
--- a/buildout.cfg
+++ b/buildout.cfg
@@ -14,7 +14,7 @@ develop = src/bob.bio.base
           .
          
 ; options for bob.buildout
-debug = true
+debug = false
 verbose = true
 newest = false
 
-- 
GitLab