Commit fb3321c3 authored by Tiago de Freitas Pereira's avatar Tiago de Freitas Pereira
Browse files

Propagated --allow-missing-files to the UBM training

propagating allow missing files

propagating allow missing files

Fixed small bug

Fixed test unit for the allow missing files

Propagated --allow-missing-files to ISV and iVector training and created tests for all combinations

Removed logger

Removed the function is_missing_file

Removed the function is_missing_file
parent e8d4b0b2
Pipeline #13518 passed with stages
in 9 minutes and 55 seconds
......@@ -158,6 +158,7 @@ def execute(args):
algorithm,
args.extractor,
args.limit_training_data,
allow_missing_files = args.allow_missing_files,
force = args.force)
# train the feature projector
......@@ -166,6 +167,7 @@ def execute(args):
algorithm,
args.extractor,
args.iteration,
allow_missing_files = args.allow_missing_files,
indices = base_tools.indices(fs.training_list('extracted', 'train_projector'), args.grid.number_of_projection_jobs),
force = args.force)
......@@ -183,6 +185,7 @@ def execute(args):
algorithm,
args.extractor,
args.limit_training_data,
allow_missing_files = args.allow_missing_files,
force = args.force)
# train the feature projector
......@@ -191,6 +194,7 @@ def execute(args):
algorithm,
args.extractor,
args.iteration,
allow_missing_files = args.allow_missing_files,
indices = base_tools.indices(fs.training_list('extracted', 'train_projector'), args.grid.number_of_projection_jobs),
force = args.force)
......
......@@ -83,6 +83,7 @@ def add_isv_jobs(args, job_ids, deps, submitter):
name = 'pro-gmm',
number_of_parallel_jobs = args.grid.number_of_projection_jobs,
dependencies = deps,
allow_missing_files = args.allow_missing_files,
**args.grid.projection_queue)
deps.append(job_ids['gmm-projection'])
......@@ -90,6 +91,7 @@ def add_isv_jobs(args, job_ids, deps, submitter):
'--sub-task train-isv',
name = 'train-isv',
dependencies = deps,
allow_missing_files = args.allow_missing_files,
**args.grid.training_queue)
deps.append(job_ids['isv-training'])
......@@ -118,12 +120,14 @@ def execute(args):
algorithm,
args.extractor,
indices = base_tools.indices(fs.training_list('extracted', 'train_projector'), args.grid.number_of_projection_jobs),
allow_missing_files = args.allow_missing_files,
force = args.force)
# train the feature projector
elif args.sub_task == 'train-isv':
tools.train_isv(
algorithm,
allow_missing_files = args.allow_missing_files,
force = args.force)
else:
......
......@@ -96,6 +96,7 @@ def add_ivector_jobs(args, job_ids, deps, submitter):
name='i-e-%d' % iteration,
number_of_parallel_jobs = args.grid.number_of_projection_jobs,
dependencies = [job_ids['ivector-m-step']] if iteration != args.tv_start_iteration else deps,
allow_missing_files = args.allow_missing_files,
**args.grid.projection_queue)
# M-step
......@@ -114,6 +115,7 @@ def add_ivector_jobs(args, job_ids, deps, submitter):
name = 'pro-ivector',
number_of_parallel_jobs = args.grid.number_of_projection_jobs,
dependencies = deps,
allow_missing_files = args.allow_missing_files,
**args.grid.projection_queue)
deps.append(job_ids['ivector-projection'])
......@@ -123,6 +125,7 @@ def add_ivector_jobs(args, job_ids, deps, submitter):
'--sub-task train-whitener',
name = 'train-whitener',
dependencies = deps,
allow_missing_files = args.allow_missing_files,
**args.grid.training_queue)
deps.append(job_ids['whitener-training'])
......@@ -133,6 +136,7 @@ def add_ivector_jobs(args, job_ids, deps, submitter):
name = 'whitened',
number_of_parallel_jobs = args.grid.number_of_projection_jobs,
dependencies = deps,
allow_missing_files = args.allow_missing_files,
**args.grid.projection_queue)
deps.append(job_ids['whitening-projection'])
......@@ -142,6 +146,7 @@ def add_ivector_jobs(args, job_ids, deps, submitter):
'--sub-task train-lda',
name = 'train-lda',
dependencies = deps,
allow_missing_files = args.allow_missing_files,
**args.grid.training_queue)
deps.append(job_ids['lda-training'])
......@@ -152,6 +157,7 @@ def add_ivector_jobs(args, job_ids, deps, submitter):
name = 'lda_projection',
number_of_parallel_jobs = args.grid.number_of_projection_jobs,
dependencies = deps,
allow_missing_files = args.allow_missing_files,
**args.grid.projection_queue)
deps.append(job_ids['lda-projection'])
......@@ -161,6 +167,7 @@ def add_ivector_jobs(args, job_ids, deps, submitter):
'--sub-task train-wccn',
name = 'train-wccn',
dependencies = deps,
allow_missing_files = args.allow_missing_files,
**args.grid.training_queue)
deps.append(job_ids['wccn-training'])
......@@ -171,6 +178,7 @@ def add_ivector_jobs(args, job_ids, deps, submitter):
name = 'wccn_projection',
number_of_parallel_jobs = args.grid.number_of_projection_jobs,
dependencies = deps,
allow_missing_files = args.allow_missing_files,
**args.grid.projection_queue)
deps.append(job_ids['wccn-projection'])
......@@ -179,6 +187,7 @@ def add_ivector_jobs(args, job_ids, deps, submitter):
job_ids['plda-training'] = submitter.submit(
'--sub-task train-plda',
name = 'train-plda',
allow_missing_files = args.allow_missing_files,
dependencies = deps,
**args.grid.training_queue)
deps.append(job_ids['plda-training'])
......@@ -216,12 +225,14 @@ def execute(args):
algorithm,
args.extractor,
indices = base_tools.indices(fs.training_list('extracted', 'train_projector'), args.grid.number_of_projection_jobs),
allow_missing_files = args.allow_missing_files,
force = args.force)
elif args.sub_task == 'ivector-e-step':
tools.ivector_estep(
algorithm,
args.iteration,
allow_missing_files = args.allow_missing_files,
indices = base_tools.indices(fs.training_list('projected_gmm', 'train_projector'), args.grid.number_of_projection_jobs),
force = args.force)
......@@ -237,17 +248,20 @@ def execute(args):
elif args.sub_task == 'ivector-projection':
tools.ivector_project(
algorithm,
allow_missing_files = args.allow_missing_files,
indices = base_tools.indices(fs.training_list('projected_gmm', 'train_projector'), args.grid.number_of_projection_jobs),
force = args.force)
elif args.sub_task == 'train-whitener':
tools.train_whitener(
algorithm,
allow_missing_files = args.allow_missing_files,
force = args.force)
elif args.sub_task == 'whitening-projection':
tools.whitening_project(
algorithm,
allow_missing_files = args.allow_missing_files,
indices = base_tools.indices(fs.training_list('projected_gmm', 'train_projector'), args.grid.number_of_projection_jobs),
force = args.force)
......@@ -255,12 +269,14 @@ def execute(args):
if algorithm.use_lda:
tools.train_lda(
algorithm,
allow_missing_files = args.allow_missing_files,
force = args.force)
elif args.sub_task == 'lda-projection':
if algorithm.use_lda:
tools.lda_project(
algorithm,
allow_missing_files = args.allow_missing_files,
indices = base_tools.indices(fs.training_list('projected_gmm', 'train_projector'), args.grid.number_of_projection_jobs),
force = args.force)
......@@ -268,12 +284,14 @@ def execute(args):
if algorithm.use_wccn:
tools.train_wccn(
algorithm,
allow_missing_files = args.allow_missing_files,
force = args.force)
elif args.sub_task == 'wccn-projection':
if algorithm.use_wccn:
tools.wccn_project(
algorithm,
allow_missing_files = args.allow_missing_files,
indices = base_tools.indices(fs.training_list('projected_gmm', 'train_projector'), args.grid.number_of_projection_jobs),
force = args.force)
......@@ -281,6 +299,7 @@ def execute(args):
if algorithm.use_plda:
tools.train_plda(
algorithm,
allow_missing_files = args.allow_missing_files,
force = args.force)
elif args.sub_task == 'save-projector':
......
......@@ -22,7 +22,7 @@ from bob.bio.base.script.verify import main
data_dir = pkg_resources.resource_filename('bob.bio.gmm', 'test/data')
def _verify(parameters, test_dir, sub_dir, ref_modifier="", score_modifier=('scores',''), executable = main):
def _verify(parameters, test_dir, sub_dir, ref_modifier="", score_modifier=('scores',''), executable = main, allow_missing_files=False):
try:
executable(parameters)
......@@ -52,6 +52,8 @@ def _verify(parameters, test_dir, sub_dir, ref_modifier="", score_modifier=('sco
assert d[0].shape == d[1].shape
# assert that the data order is still correct
assert (d[0][:,0:3] == d[1][:, 0:3]).all()
if not allow_missing_files:
# assert that the values are OK
assert numpy.allclose(d[0][:,3].astype(float), d[1][:,3].astype(float), 1e-5)
......@@ -77,6 +79,26 @@ def test_gmm_sequential():
_verify(parameters, test_dir, 'test_gmm_sequential', ref_modifier='-gmm')
def test_gmm_sequential_missingfiles():
test_dir = tempfile.mkdtemp(prefix='bobtest_')
# define dummy parameters
parameters = [
'-d', 'dummy',
'-p', 'bob.bio.base.test.dummy.preprocessor.DummyPreprocessor(return_none=True, probability_of_none=0.5)',
'-e', 'dummy2d',
'-a', 'bob.bio.gmm.algorithm.GMM(2, 2, 2)',
'--zt-norm',
'-vs', 'test_gmm_sequential',
'--temp-directory', test_dir,
'--result-directory', test_dir,
'--preferred-package', 'bob.bio.gmm',
'--allow-missing-files'
]
_verify(parameters, test_dir, 'test_gmm_sequential', ref_modifier='-gmm', allow_missing_files=True)
@bob.bio.base.test.utils.grid_available
def test_gmm_parallel():
from bob.bio.gmm.script.verify_gmm import main
......@@ -100,6 +122,30 @@ def test_gmm_parallel():
_verify(parameters, test_dir, 'test_gmm_parallel', executable=main, ref_modifier='-gmm')
@bob.bio.base.test.utils.grid_available
def test_gmm_parallel_missingfiles():
from bob.bio.gmm.script.verify_gmm import main
test_dir = tempfile.mkdtemp(prefix='bobtest_')
test_database = os.path.join(test_dir, "submitted.sql3")
# define dummy parameters
parameters = [
'-d', 'dummy',
'-p', 'bob.bio.base.test.dummy.preprocessor.DummyPreprocessor(return_none=True, probability_of_none=0.5)',
'-e', 'dummy2d',
'-a', 'bob.bio.gmm.algorithm.GMM(2, 2, 2)', '--import', 'bob.bio.gmm', 'bob.io.image',
'-g', 'bob.bio.base.grid.Grid(grid_type = "local", number_of_parallel_processes = 2, scheduler_sleep_time = 0.1)', '-G', test_database, '--run-local-scheduler', '--stop-on-failure',
'--clean-intermediate',
'--zt-norm',
'-vs', 'test_gmm_parallel',
'--temp-directory', test_dir,
'--result-directory', test_dir,
'--preferred-package', 'bob.bio.gmm',
'--allow-missing-files'
]
_verify(parameters, test_dir, 'test_gmm_parallel', executable=main, ref_modifier='-gmm', allow_missing_files=True)
def test_isv_sequential():
test_dir = tempfile.mkdtemp(prefix='bobtest_')
# define dummy parameters
......@@ -118,6 +164,25 @@ def test_isv_sequential():
_verify(parameters, test_dir, 'test_isv_sequential', ref_modifier='-isv')
def test_isv_sequential_missingfiles():
test_dir = tempfile.mkdtemp(prefix='bobtest_')
# define dummy parameters
parameters = [
'-d', 'dummy',
'-p', 'bob.bio.base.test.dummy.preprocessor.DummyPreprocessor(return_none=True, probability_of_none=0.5)',
'-e', 'dummy2d',
'-a', 'bob.bio.gmm.algorithm.ISV(10, number_of_gaussians=2, kmeans_training_iterations=2, gmm_training_iterations=2, isv_training_iterations=2)',
'--zt-norm',
'-vs', 'test_isv_sequential',
'--temp-directory', test_dir,
'--result-directory', test_dir,
'--preferred-package', 'bob.bio.gmm',
'--allow-missing-files'
]
_verify(parameters, test_dir, 'test_isv_sequential', ref_modifier='-isv', allow_missing_files=True)
@bob.bio.base.test.utils.grid_available
def test_isv_parallel():
from bob.bio.gmm.script.verify_isv import main
......@@ -141,6 +206,30 @@ def test_isv_parallel():
_verify(parameters, test_dir, 'test_isv_parallel', executable=main, ref_modifier='-isv')
@bob.bio.base.test.utils.grid_available
def test_isv_parallel_missing_files():
from bob.bio.gmm.script.verify_isv import main
test_dir = tempfile.mkdtemp(prefix='bobtest_')
test_database = os.path.join(test_dir, "submitted.sql3")
# define dummy parameters
parameters = [
'-d', 'dummy',
'-p', 'bob.bio.base.test.dummy.preprocessor.DummyPreprocessor(return_none=True, probability_of_none=0.5)',
'-e', 'dummy2d',
'-a', 'bob.bio.gmm.algorithm.ISV(10, number_of_gaussians=2, kmeans_training_iterations=2, gmm_training_iterations=2, isv_training_iterations=2)', '--import', 'bob.bio.gmm', 'bob.io.image',
'-g', 'bob.bio.base.grid.Grid(grid_type = "local", number_of_parallel_processes = 2, scheduler_sleep_time = 0.1)', '-G', test_database, '--run-local-scheduler', '--stop-on-failure',
'--clean-intermediate',
'--zt-norm',
'-vs', 'test_isv_parallel',
'--temp-directory', test_dir,
'--result-directory', test_dir,
'--preferred-package', 'bob.bio.gmm',
'--allow-missing-files'
]
_verify(parameters, test_dir, 'test_isv_parallel', executable=main, ref_modifier='-isv', allow_missing_files=True)
def test_ivector_cosine_sequential():
test_dir = tempfile.mkdtemp(prefix='bobtest_')
# define dummy parameters
......@@ -159,6 +248,25 @@ def test_ivector_cosine_sequential():
_verify(parameters, test_dir, 'test_ivector_cosine_sequential', ref_modifier='-ivector-cosine')
def test_ivector_cosine_sequential_missing_files():
test_dir = tempfile.mkdtemp(prefix='bobtest_')
# define dummy parameters
parameters = [
'-d', 'dummy',
'-p', 'bob.bio.base.test.dummy.preprocessor.DummyPreprocessor(return_none=True, probability_of_none=0.5)',
'-e', 'dummy2d',
'-a', 'bob.bio.gmm.algorithm.IVector(10, number_of_gaussians=2, kmeans_training_iterations=2, gmm_training_iterations=2, tv_training_iterations=2)',
'--zt-norm',
'-vs', 'test_ivector_cosine_sequential',
'--temp-directory', test_dir,
'--result-directory', test_dir,
'--preferred-package', 'bob.bio.gmm',
'--allow-missing-files'
]
_verify(parameters, test_dir, 'test_ivector_cosine_sequential', ref_modifier='-ivector-cosine', allow_missing_files=True)
@bob.bio.base.test.utils.grid_available
def test_ivector_cosine_parallel():
from bob.bio.gmm.script.verify_ivector import main
......@@ -181,6 +289,32 @@ def test_ivector_cosine_parallel():
_verify(parameters, test_dir, 'test_ivector_cosine_parallel', executable=main, ref_modifier='-ivector-cosine')
@bob.bio.base.test.utils.grid_available
def test_ivector_cosine_parallel_missing_files():
from bob.bio.gmm.script.verify_ivector import main
test_dir = tempfile.mkdtemp(prefix='bobtest_')
test_database = os.path.join(test_dir, "submitted.sql3")
# define dummy parameters
parameters = [
'-d', 'dummy',
'-p', 'bob.bio.base.test.dummy.preprocessor.DummyPreprocessor(return_none=True, probability_of_none=0.5)',
'-e', 'dummy2d',
'-a', 'bob.bio.gmm.algorithm.IVector(10, number_of_gaussians=2, kmeans_training_iterations=2, gmm_training_iterations=2, tv_training_iterations=2)', '--import', 'bob.bio.gmm', 'bob.io.image',
'-g', 'bob.bio.base.grid.Grid(grid_type = "local", number_of_parallel_processes = 2, scheduler_sleep_time = 0.1)', '-G', test_database, '--run-local-scheduler', '--stop-on-failure',
'--clean-intermediate',
'--zt-norm',
'-vs', 'test_ivector_cosine_parallel',
'--temp-directory', test_dir,
'--result-directory', test_dir,
'--preferred-package', 'bob.bio.gmm',
'--allow-missing-files'
]
_verify(parameters, test_dir, 'test_ivector_cosine_parallel', executable=main, ref_modifier='-ivector-cosine', allow_missing_files=True)
def test_ivector_lda_wccn_plda_sequential():
test_dir = tempfile.mkdtemp(prefix='bobtest_')
# define dummy parameters
......@@ -221,6 +355,30 @@ def test_ivector_lda_wccn_plda_parallel():
_verify(parameters, test_dir, 'test_ivector_lda_wccn_plda_parallel', executable=main, ref_modifier='-ivector-lda-wccn-plda')
@bob.bio.base.test.utils.grid_available
def test_ivector_lda_wccn_plda_parallel_missing_files():
from bob.bio.gmm.script.verify_ivector import main
test_dir = tempfile.mkdtemp(prefix='bobtest_')
test_database = os.path.join(test_dir, "submitted.sql3")
# define dummy parameters
parameters = [
'-d', 'dummy',
'-p', 'bob.bio.base.test.dummy.preprocessor.DummyPreprocessor(return_none=True, probability_of_none=0.5)',
'-e', 'dummy2d',
'-a', 'bob.bio.gmm.algorithm.IVector(10, number_of_gaussians=2, kmeans_training_iterations=2, gmm_training_iterations=2, tv_training_iterations=2, use_lda=True, use_wccn=True, use_plda=True, lda_dim=2, plda_dim_F=2, plda_dim_G=2, plda_training_iterations=2)', '--import', 'bob.bio.gmm', 'bob.io.image',
'-g', 'bob.bio.base.grid.Grid(grid_type = "local", number_of_parallel_processes = 2, scheduler_sleep_time = 0.1)', '-G', test_database, '--run-local-scheduler', '--stop-on-failure',
'--clean-intermediate',
'--zt-norm',
'-vs', 'test_ivector_lda_wccn_plda_parallel',
'--temp-directory', test_dir,
'--result-directory', test_dir,
'--preferred-package', 'bob.bio.gmm',
'--allow-missing-files'
]
_verify(parameters, test_dir, 'test_ivector_lda_wccn_plda_parallel', executable=main, ref_modifier='-ivector-lda-wccn-plda', allow_missing_files=True)
def test_internal_raises():
test_dir = tempfile.mkdtemp(prefix='bobtest_')
test_database = os.path.join(test_dir, "submitted.sql3")
......@@ -253,3 +411,4 @@ def test_internal_raises():
nose.tools.assert_raises(ValueError, script, internal)
shutil.rmtree(test_dir)
......@@ -4,5 +4,6 @@ from .gmm import *
from .isv import *
from .ivector import *
# gets sphinx autodoc done right - don't remove it
__all__ = [_ for _ in dir() if not _.startswith('_')]
......@@ -12,7 +12,7 @@ from bob.bio.base import utils, tools
from .utils import read_feature
def kmeans_initialize(algorithm, extractor, limit_data = None, force = False):
def kmeans_initialize(algorithm, extractor, limit_data = None, force = False, allow_missing_files = False):
"""Initializes the K-Means training (non-parallel)."""
fs = FileSelector.instance()
......@@ -27,7 +27,7 @@ def kmeans_initialize(algorithm, extractor, limit_data = None, force = False):
# read the features
reader = functools.partial(read_feature, extractor)
data = utils.vstack_features(reader, training_list)
data = utils.vstack_features(reader, training_list, allow_missing_files=allow_missing_files)
# Perform KMeans initialization
kmeans_machine = bob.learn.em.KMeansMachine(algorithm.gaussians, data.shape[1])
......@@ -38,7 +38,7 @@ def kmeans_initialize(algorithm, extractor, limit_data = None, force = False):
logger.info("UBM training: saved initial KMeans machine to '%s'", output_file)
def kmeans_estep(algorithm, extractor, iteration, indices, force=False):
def kmeans_estep(algorithm, extractor, iteration, indices, force=False, allow_missing_files = False):
"""Performs a single E-step of the K-Means algorithm (parallel)"""
if indices[0] >= indices[1]:
return
......@@ -62,7 +62,8 @@ def kmeans_estep(algorithm, extractor, iteration, indices, force=False):
reader = functools.partial(read_feature, extractor)
data = utils.vstack_features(
reader,
(training_list[index] for index in range(indices[0], indices[1])))
(training_list[index] for index in range(indices[0], indices[1])),
allow_missing_files=allow_missing_files)
# Performs the E-step
trainer = algorithm.kmeans_trainer
......@@ -162,7 +163,7 @@ def kmeans_mstep(algorithm, iteration, number_of_parallel_jobs, force=False, cle
def gmm_initialize(algorithm, extractor, limit_data = None, force = False):
def gmm_initialize(algorithm, extractor, limit_data = None, force = False, allow_missing_files = False):
"""Initializes the GMM calculation with the result of the K-Means algorithm (non-parallel).
This might require a lot of memory."""
fs = FileSelector.instance()
......@@ -178,7 +179,7 @@ def gmm_initialize(algorithm, extractor, limit_data = None, force = False):
# read the features
reader = functools.partial(read_feature, extractor)
data = utils.vstack_features(reader, training_list)
data = utils.vstack_features(reader, training_list, allow_missing_files=allow_missing_files)
# get means and variances of kmeans result
kmeans_machine = bob.learn.em.KMeansMachine(bob.io.base.HDF5File(fs.kmeans_file))
......@@ -199,7 +200,7 @@ def gmm_initialize(algorithm, extractor, limit_data = None, force = False):
logger.info("UBM Training: Wrote GMM file '%s'", output_file)
def gmm_estep(algorithm, extractor, iteration, indices, force=False):
def gmm_estep(algorithm, extractor, iteration, indices, force=False, allow_missing_files = False):
"""Performs a single E-step of the GMM training (parallel)."""
if indices[0] >= indices[1]:
return
......@@ -221,7 +222,8 @@ def gmm_estep(algorithm, extractor, iteration, indices, force=False):
reader = functools.partial(read_feature, extractor)
data = utils.vstack_features(
reader,
(training_list[index] for index in range(indices[0], indices[1])))
(training_list[index] for index in range(indices[0], indices[1]))
, allow_missing_files=allow_missing_files)
trainer = algorithm.ubm_trainer
trainer.initialize(gmm_machine, None)
......@@ -294,10 +296,9 @@ def gmm_mstep(algorithm, iteration, number_of_parallel_jobs, force=False, clean=
shutil.rmtree(old_dir)
def gmm_project(algorithm, extractor, indices, force=False):
def gmm_project(algorithm, extractor, indices, force=False, allow_missing_files = False):
"""Performs GMM projection"""
fs = FileSelector.instance()
algorithm.load_ubm(fs.ubm_file)
feature_files = fs.training_list('extracted', 'train_projector')
......@@ -311,10 +312,12 @@ def gmm_project(algorithm, extractor, indices, force=False):
projected_file = projected_files[i]
if not utils.check_file(projected_file, force):
if len(utils.filter_missing_files([feature_file], split_by_client=False, allow_missing_files=allow_missing_files)) > 0:
# load feature
feature = read_feature(extractor, feature_file)
feature = read_feature(extractor, feature_file, allow_missing_files=allow_missing_files)
# project feature
projected = algorithm.project_ubm(feature)
# write it
bob.io.base.create_directories_safe(os.path.dirname(projected_file))
bob.bio.base.save(projected, projected_file)
......@@ -7,7 +7,8 @@ import os
from bob.bio.base.tools.FileSelector import FileSelector
from bob.bio.base import utils, tools
def train_isv(algorithm, force=False):
def train_isv(algorithm, force=False, allow_missing_files=False):
"""Finally, the UBM is used to train the ISV projector/enroller."""
fs = FileSelector.instance()
......@@ -19,6 +20,7 @@ def train_isv(algorithm, force=False):
# read training data
training_list = fs.training_list('projected_gmm', 'train_projector', arrange_by_client = True)
training_list = utils.filter_missing_files(training_list, split_by_client=True, allow_missing_files=allow_missing_files)
train_gmm_stats = [[algorithm.read_gmm_stats(filename) for filename in client_files] for client_files in training_list]
# perform ISV training
......
......@@ -9,8 +9,7 @@ from bob.bio.base.tools.FileSelector import FileSelector
from bob.bio.base import utils, tools
def ivector_estep(algorithm, iteration, indices, force=False):
def ivector_estep(algorithm, iteration, indices, force=False, allow_missing_files = False):
"""Performs a single E-step of the IVector algorithm (parallel)"""
fs = FileSelector.instance()
stats_file = fs.ivector_stats_file(iteration, indices[0], indices[1])
......@@ -38,7 +37,9 @@ def ivector_estep(algorithm, iteration, indices, force=False):
# Load data
training_list = fs.training_list('projected_gmm', 'train_projector')
data = [algorithm.read_gmm_stats(training_list[i]) for i in range(indices[0], indices[1])]
training_list = [training_list[i] for i in range(indices[0], indices[1])]
training_list = utils.filter_missing_files(training_list, split_by_client=False, allow_missing_files=allow_missing_files)
data = [algorithm.read_gmm_stats(f) for f in training_list]
# Perform the E-step
trainer.e_step(tv, data)
......@@ -134,7 +135,7 @@ def ivector_mstep(algorithm, iteration, number_of_parallel_jobs, force=False, cl
shutil.rmtree(old_dir)
def ivector_project(algorithm, indices, force=False):
def ivector_project(algorithm, indices, force=False, allow_missing_files=False):
"""Performs IVector projection"""
# read UBM and TV into the IVector class
fs = FileSelector.instance()
......@@ -149,7 +150,9 @@ def ivector_project(algorithm, indices, force=False):
for i in range(indices[0], indices[1]):
gmm_stats_file = gmm_stats_files[i]
ivector_file = ivector_files[i]
if not utils.check_file(ivector_file, force):
if len(utils.filter_missing_files([gmm_stats_file], split_by_client=False, allow_missing_files=allow_missing_files)) > 0:
# load feature
feature = algorithm.read_gmm_stats(gmm_stats_file)
# project feature
......@@ -159,7 +162,7 @@ def ivector_project(algorithm, indices, force=False):
bob.bio.base.save(projected, ivector_file)
def train_whitener(algorithm, force=False):
def train_whitener(algorithm, force=False, allow_missing_files=False):
"""Train the feature projector with the extracted features of the world group."""
fs = FileSelector.instance()
......@@ -167,14 +170,16 @@ def train_whitener(algorithm, force=False):
logger.info("- Whitening projector '%s' already exists.", fs.whitener_file)
else:
train_files = fs.training_list('projected_ivector', 'train_projector')
train_files = utils.filter_missing_files(train_files, split_by_client=False, allow_missing_files=allow_missing_files)
train_features = [bob.bio.base.load(f) for f in train_files]
# perform training
algorithm.train_whitener(train_features)
bob.io.base.create_directories_safe(os.path.dirname(fs.whitener_file))
bob.bio.base.save(algorithm.whitener, fs.whitener_file)
def whitening_project(algorithm, indices, force=False):
def