Skip to content
Snippets Groups Projects
Commit 19822e24 authored by Tiago de Freitas Pereira's avatar Tiago de Freitas Pereira
Browse files

Propagated --allow-missing-files to the UBM training

propagating allow missing files

propagating allow missing files
parent e8d4b0b2
No related branches found
No related tags found
No related merge requests found
Pipeline #
...@@ -158,6 +158,7 @@ def execute(args): ...@@ -158,6 +158,7 @@ def execute(args):
algorithm, algorithm,
args.extractor, args.extractor,
args.limit_training_data, args.limit_training_data,
allow_missing_files = args.allow_missing_files,
force = args.force) force = args.force)
# train the feature projector # train the feature projector
...@@ -166,6 +167,7 @@ def execute(args): ...@@ -166,6 +167,7 @@ def execute(args):
algorithm, algorithm,
args.extractor, args.extractor,
args.iteration, args.iteration,
allow_missing_files = args.allow_missing_files,
indices = base_tools.indices(fs.training_list('extracted', 'train_projector'), args.grid.number_of_projection_jobs), indices = base_tools.indices(fs.training_list('extracted', 'train_projector'), args.grid.number_of_projection_jobs),
force = args.force) force = args.force)
...@@ -183,6 +185,7 @@ def execute(args): ...@@ -183,6 +185,7 @@ def execute(args):
algorithm, algorithm,
args.extractor, args.extractor,
args.limit_training_data, args.limit_training_data,
allow_missing_files = args.allow_missing_files,
force = args.force) force = args.force)
# train the feature projector # train the feature projector
...@@ -191,6 +194,7 @@ def execute(args): ...@@ -191,6 +194,7 @@ def execute(args):
algorithm, algorithm,
args.extractor, args.extractor,
args.iteration, args.iteration,
allow_missing_files = args.allow_missing_files,
indices = base_tools.indices(fs.training_list('extracted', 'train_projector'), args.grid.number_of_projection_jobs), indices = base_tools.indices(fs.training_list('extracted', 'train_projector'), args.grid.number_of_projection_jobs),
force = args.force) force = args.force)
......
import logging
logger = logging.getLogger("bob.bio.gmm")
def check_allow_missing_files(allow_missing_files, filename):
"""
Just a snniped that checks the allow missing files flag
"""
if not os.path.exists(filename):
if allow_missing_files:
logger.debug("... Cannot find the file %s; skipping", filename)
else:
raise RuntimeError("Cannot find the file '%s' " % filename)
from .utils import * from .utils import *
from .command_line import * from .command_line import *
from .gmm import * from .gmm import *
from .isv import * from .isv import *
from .ivector import * from .ivector import *
# gets sphinx autodoc done right - don't remove it # gets sphinx autodoc done right - don't remove it
__all__ = [_ for _ in dir() if not _.startswith('_')] __all__ = [_ for _ in dir() if not _.startswith('_')]
...@@ -12,7 +12,7 @@ from bob.bio.base import utils, tools ...@@ -12,7 +12,7 @@ from bob.bio.base import utils, tools
from .utils import read_feature from .utils import read_feature
def kmeans_initialize(algorithm, extractor, limit_data = None, force = False): def kmeans_initialize(algorithm, extractor, limit_data = None, force = False, allow_missing_files = False):
"""Initializes the K-Means training (non-parallel).""" """Initializes the K-Means training (non-parallel)."""
fs = FileSelector.instance() fs = FileSelector.instance()
...@@ -27,7 +27,7 @@ def kmeans_initialize(algorithm, extractor, limit_data = None, force = False): ...@@ -27,7 +27,7 @@ def kmeans_initialize(algorithm, extractor, limit_data = None, force = False):
# read the features # read the features
reader = functools.partial(read_feature, extractor) reader = functools.partial(read_feature, extractor)
data = utils.vstack_features(reader, training_list) data = utils.vstack_features(reader, training_list, allow_missing_files=allow_missing_files)
# Perform KMeans initialization # Perform KMeans initialization
kmeans_machine = bob.learn.em.KMeansMachine(algorithm.gaussians, data.shape[1]) kmeans_machine = bob.learn.em.KMeansMachine(algorithm.gaussians, data.shape[1])
...@@ -38,7 +38,7 @@ def kmeans_initialize(algorithm, extractor, limit_data = None, force = False): ...@@ -38,7 +38,7 @@ def kmeans_initialize(algorithm, extractor, limit_data = None, force = False):
logger.info("UBM training: saved initial KMeans machine to '%s'", output_file) logger.info("UBM training: saved initial KMeans machine to '%s'", output_file)
def kmeans_estep(algorithm, extractor, iteration, indices, force=False): def kmeans_estep(algorithm, extractor, iteration, indices, force=False, allow_missing_files = False):
"""Performs a single E-step of the K-Means algorithm (parallel)""" """Performs a single E-step of the K-Means algorithm (parallel)"""
if indices[0] >= indices[1]: if indices[0] >= indices[1]:
return return
...@@ -62,7 +62,8 @@ def kmeans_estep(algorithm, extractor, iteration, indices, force=False): ...@@ -62,7 +62,8 @@ def kmeans_estep(algorithm, extractor, iteration, indices, force=False):
reader = functools.partial(read_feature, extractor) reader = functools.partial(read_feature, extractor)
data = utils.vstack_features( data = utils.vstack_features(
reader, reader,
(training_list[index] for index in range(indices[0], indices[1]))) (training_list[index] for index in range(indices[0], indices[1])),
allow_missing_files=allow_missing_files)
# Performs the E-step # Performs the E-step
trainer = algorithm.kmeans_trainer trainer = algorithm.kmeans_trainer
...@@ -162,7 +163,7 @@ def kmeans_mstep(algorithm, iteration, number_of_parallel_jobs, force=False, cle ...@@ -162,7 +163,7 @@ def kmeans_mstep(algorithm, iteration, number_of_parallel_jobs, force=False, cle
def gmm_initialize(algorithm, extractor, limit_data = None, force = False): def gmm_initialize(algorithm, extractor, limit_data = None, force = False, allow_missing_files = False):
"""Initializes the GMM calculation with the result of the K-Means algorithm (non-parallel). """Initializes the GMM calculation with the result of the K-Means algorithm (non-parallel).
This might require a lot of memory.""" This might require a lot of memory."""
fs = FileSelector.instance() fs = FileSelector.instance()
...@@ -178,7 +179,7 @@ def gmm_initialize(algorithm, extractor, limit_data = None, force = False): ...@@ -178,7 +179,7 @@ def gmm_initialize(algorithm, extractor, limit_data = None, force = False):
# read the features # read the features
reader = functools.partial(read_feature, extractor) reader = functools.partial(read_feature, extractor)
data = utils.vstack_features(reader, training_list) data = utils.vstack_features(reader, training_list, allow_missing_files=allow_missing_files)
# get means and variances of kmeans result # get means and variances of kmeans result
kmeans_machine = bob.learn.em.KMeansMachine(bob.io.base.HDF5File(fs.kmeans_file)) kmeans_machine = bob.learn.em.KMeansMachine(bob.io.base.HDF5File(fs.kmeans_file))
...@@ -199,7 +200,7 @@ def gmm_initialize(algorithm, extractor, limit_data = None, force = False): ...@@ -199,7 +200,7 @@ def gmm_initialize(algorithm, extractor, limit_data = None, force = False):
logger.info("UBM Training: Wrote GMM file '%s'", output_file) logger.info("UBM Training: Wrote GMM file '%s'", output_file)
def gmm_estep(algorithm, extractor, iteration, indices, force=False): def gmm_estep(algorithm, extractor, iteration, indices, force=False, allow_missing_files = False):
"""Performs a single E-step of the GMM training (parallel).""" """Performs a single E-step of the GMM training (parallel)."""
if indices[0] >= indices[1]: if indices[0] >= indices[1]:
return return
...@@ -221,7 +222,8 @@ def gmm_estep(algorithm, extractor, iteration, indices, force=False): ...@@ -221,7 +222,8 @@ def gmm_estep(algorithm, extractor, iteration, indices, force=False):
reader = functools.partial(read_feature, extractor) reader = functools.partial(read_feature, extractor)
data = utils.vstack_features( data = utils.vstack_features(
reader, reader,
(training_list[index] for index in range(indices[0], indices[1]))) (training_list[index] for index in range(indices[0], indices[1]))
, allow_missing_files=allow_missing_files)
trainer = algorithm.ubm_trainer trainer = algorithm.ubm_trainer
trainer.initialize(gmm_machine, None) trainer.initialize(gmm_machine, None)
...@@ -294,7 +296,7 @@ def gmm_mstep(algorithm, iteration, number_of_parallel_jobs, force=False, clean= ...@@ -294,7 +296,7 @@ def gmm_mstep(algorithm, iteration, number_of_parallel_jobs, force=False, clean=
shutil.rmtree(old_dir) shutil.rmtree(old_dir)
def gmm_project(algorithm, extractor, indices, force=False): def gmm_project(algorithm, extractor, indices, force=False, allow_missing_files = False):
"""Performs GMM projection""" """Performs GMM projection"""
fs = FileSelector.instance() fs = FileSelector.instance()
...@@ -311,8 +313,9 @@ def gmm_project(algorithm, extractor, indices, force=False): ...@@ -311,8 +313,9 @@ def gmm_project(algorithm, extractor, indices, force=False):
projected_file = projected_files[i] projected_file = projected_files[i]
if not utils.check_file(projected_file, force): if not utils.check_file(projected_file, force):
# load feature # load feature
feature = read_feature(extractor, feature_file) feature = read_feature(extractor, feature_file, allow_missing_files=allow_missing_files)
# project feature # project feature
projected = algorithm.project_ubm(feature) projected = algorithm.project_ubm(feature)
# write it # write it
......
...@@ -6,8 +6,9 @@ import os ...@@ -6,8 +6,9 @@ import os
from bob.bio.base.tools.FileSelector import FileSelector from bob.bio.base.tools.FileSelector import FileSelector
from bob.bio.base import utils, tools from bob.bio.base import utils, tools
from bob.bio.gmm.tools import check_allow_missing_files
def train_isv(algorithm, force=False): def train_isv(algorithm, force=False, allow_missing_files=False):
"""Finally, the UBM is used to train the ISV projector/enroller.""" """Finally, the UBM is used to train the ISV projector/enroller."""
fs = FileSelector.instance() fs = FileSelector.instance()
...@@ -19,7 +20,15 @@ def train_isv(algorithm, force=False): ...@@ -19,7 +20,15 @@ def train_isv(algorithm, force=False):
# read training data # read training data
training_list = fs.training_list('projected_gmm', 'train_projector', arrange_by_client = True) training_list = fs.training_list('projected_gmm', 'train_projector', arrange_by_client = True)
train_gmm_stats = [[algorithm.read_gmm_stats(filename) for filename in client_files] for client_files in training_list]
train_gmm_stats = []
for client_files in training_list:
client_stats = []
for filename in client_files:
check_allow_missing_files(allow_missing_files, filename)
client_stats.append(algorithm.read_gmm_stats(filename))
train_gmm_stats.append(client_stats)
#train_gmm_stats = [[algorithm.read_gmm_stats(filename) for filename in client_files] for client_files in training_list]
# perform ISV training # perform ISV training
logger.info("ISV training: training ISV with %d clients", len(train_gmm_stats)) logger.info("ISV training: training ISV with %d clients", len(train_gmm_stats))
......
...@@ -8,9 +8,9 @@ import shutil ...@@ -8,9 +8,9 @@ import shutil
from bob.bio.base.tools.FileSelector import FileSelector from bob.bio.base.tools.FileSelector import FileSelector
from bob.bio.base import utils, tools from bob.bio.base import utils, tools
from . import check_allow_missing_files
def ivector_estep(algorithm, iteration, indices, force=False, allow_missing_files = False):
def ivector_estep(algorithm, iteration, indices, force=False):
"""Performs a single E-step of the IVector algorithm (parallel)""" """Performs a single E-step of the IVector algorithm (parallel)"""
fs = FileSelector.instance() fs = FileSelector.instance()
stats_file = fs.ivector_stats_file(iteration, indices[0], indices[1]) stats_file = fs.ivector_stats_file(iteration, indices[0], indices[1])
...@@ -38,7 +38,13 @@ def ivector_estep(algorithm, iteration, indices, force=False): ...@@ -38,7 +38,13 @@ def ivector_estep(algorithm, iteration, indices, force=False):
# Load data # Load data
training_list = fs.training_list('projected_gmm', 'train_projector') training_list = fs.training_list('projected_gmm', 'train_projector')
data = [algorithm.read_gmm_stats(training_list[i]) for i in range(indices[0], indices[1])]
data = []
for i in range(indices[0], indices[1]):
filename = training_list[i]
check_allow_missing_files(allow_missing_files, filename)
data.append(algorithm.read_gmm_stats(filename))
#data = [algorithm.read_gmm_stats(training_list[i]) for i in range(indices[0], indices[1])]
# Perform the E-step # Perform the E-step
trainer.e_step(tv, data) trainer.e_step(tv, data)
...@@ -134,7 +140,7 @@ def ivector_mstep(algorithm, iteration, number_of_parallel_jobs, force=False, cl ...@@ -134,7 +140,7 @@ def ivector_mstep(algorithm, iteration, number_of_parallel_jobs, force=False, cl
shutil.rmtree(old_dir) shutil.rmtree(old_dir)
def ivector_project(algorithm, indices, force=False): def ivector_project(algorithm, indices, force=False, allow_missing_files=False):
"""Performs IVector projection""" """Performs IVector projection"""
# read UBM and TV into the IVector class # read UBM and TV into the IVector class
fs = FileSelector.instance() fs = FileSelector.instance()
...@@ -150,6 +156,9 @@ def ivector_project(algorithm, indices, force=False): ...@@ -150,6 +156,9 @@ def ivector_project(algorithm, indices, force=False):
gmm_stats_file = gmm_stats_files[i] gmm_stats_file = gmm_stats_files[i]
ivector_file = ivector_files[i] ivector_file = ivector_files[i]
if not utils.check_file(ivector_file, force): if not utils.check_file(ivector_file, force):
check_allow_missing_files(allow_missing_files, gmm_stats_file)
# load feature # load feature
feature = algorithm.read_gmm_stats(gmm_stats_file) feature = algorithm.read_gmm_stats(gmm_stats_file)
# project feature # project feature
...@@ -159,7 +168,7 @@ def ivector_project(algorithm, indices, force=False): ...@@ -159,7 +168,7 @@ def ivector_project(algorithm, indices, force=False):
bob.bio.base.save(projected, ivector_file) bob.bio.base.save(projected, ivector_file)
def train_whitener(algorithm, force=False): def train_whitener(algorithm, force=False, allow_missing_files=False):
"""Train the feature projector with the extracted features of the world group.""" """Train the feature projector with the extracted features of the world group."""
fs = FileSelector.instance() fs = FileSelector.instance()
...@@ -167,7 +176,14 @@ def train_whitener(algorithm, force=False): ...@@ -167,7 +176,14 @@ def train_whitener(algorithm, force=False):
logger.info("- Whitening projector '%s' already exists.", fs.whitener_file) logger.info("- Whitening projector '%s' already exists.", fs.whitener_file)
else: else:
train_files = fs.training_list('projected_ivector', 'train_projector') train_files = fs.training_list('projected_ivector', 'train_projector')
train_features = [bob.bio.base.load(f) for f in train_files]
train_features = []
for f in train_files:
check_allow_missing_files(allow_missing_files, f)
train_features.append(bob.bio.base.load(f))
#train_features = [bob.bio.base.load(f) for f in train_files]
# perform training # perform training
algorithm.train_whitener(train_features) algorithm.train_whitener(train_features)
bob.io.base.create_directories_safe(os.path.dirname(fs.whitener_file)) bob.io.base.create_directories_safe(os.path.dirname(fs.whitener_file))
...@@ -188,6 +204,8 @@ def whitening_project(algorithm, indices, force=False): ...@@ -188,6 +204,8 @@ def whitening_project(algorithm, indices, force=False):
ivector_file = ivector_files[i] ivector_file = ivector_files[i]
whitened_file = whitened_files[i] whitened_file = whitened_files[i]
if not utils.check_file(whitened_file, force): if not utils.check_file(whitened_file, force):
check_allow_missing_files(allow_missing_files, ivector_file)
# load feature # load feature
ivector = algorithm.read_feature(ivector_file) ivector = algorithm.read_feature(ivector_file)
# project feature # project feature
...@@ -204,7 +222,16 @@ def train_lda(algorithm, force=False): ...@@ -204,7 +222,16 @@ def train_lda(algorithm, force=False):
logger.info("- LDA projector '%s' already exists.", fs.lda_file) logger.info("- LDA projector '%s' already exists.", fs.lda_file)
else: else:
train_files = fs.training_list('whitened', 'train_projector', arrange_by_client = True) train_files = fs.training_list('whitened', 'train_projector', arrange_by_client = True)
train_features = [[bob.bio.base.load(filename) for filename in client_files] for client_files in train_files] #train_features = [[bob.bio.base.load(filename) for filename in client_files] for client_files in train_files]
train_features = []
for client_files in train_files:
client_features = []
for filename in client_files:
check_allow_missing_files(allow_missing_files, filename)
client_features.append(bob.bio.base.load(filename))
train_features.append(client_features)
# perform training # perform training
algorithm.train_lda(train_features) algorithm.train_lda(train_features)
bob.io.base.create_directories_safe(os.path.dirname(fs.lda_file)) bob.io.base.create_directories_safe(os.path.dirname(fs.lda_file))
...@@ -224,6 +251,8 @@ def lda_project(algorithm, indices, force=False): ...@@ -224,6 +251,8 @@ def lda_project(algorithm, indices, force=False):
ivector_file = whitened_files[i] ivector_file = whitened_files[i]
lda_projected_file = lda_projected_files[i] lda_projected_file = lda_projected_files[i]
if not utils.check_file(lda_projected_file, force): if not utils.check_file(lda_projected_file, force):
check_allow_missing_files(allow_missing_files, ivector_file)
# load feature # load feature
ivector = algorithm.read_feature(ivector_file) ivector = algorithm.read_feature(ivector_file)
# project feature # project feature
...@@ -244,7 +273,16 @@ def train_wccn(algorithm, force=False): ...@@ -244,7 +273,16 @@ def train_wccn(algorithm, force=False):
else: else:
input_label = 'whitened' input_label = 'whitened'
train_files = fs.training_list(input_label, 'train_projector', arrange_by_client = True) train_files = fs.training_list(input_label, 'train_projector', arrange_by_client = True)
train_features = [[bob.bio.base.load(filename) for filename in client_files] for client_files in train_files] #train_features = [[bob.bio.base.load(filename) for filename in client_files] for client_files in train_files]
train_features = []
for client_files in train_files:
client_features = []
for filename in client_files:
check_allow_missing_files(allow_missing_files, filename)
client_features.append(bob.bio.base.load(filename))
train_features.append(client_features)
# perform training # perform training
algorithm.train_wccn(train_features) algorithm.train_wccn(train_features)
bob.io.base.create_directories_safe(os.path.dirname(fs.wccn_file)) bob.io.base.create_directories_safe(os.path.dirname(fs.wccn_file))
...@@ -267,7 +305,9 @@ def wccn_project(algorithm, indices, force=False): ...@@ -267,7 +305,9 @@ def wccn_project(algorithm, indices, force=False):
for i in range(indices[0], indices[1]): for i in range(indices[0], indices[1]):
ivector_file = input_files[i] ivector_file = input_files[i]
wccn_projected_file = wccn_projected_files[i] wccn_projected_file = wccn_projected_files[i]
if not utils.check_file(wccn_projected_file, force): if not utils.check_file(wccn_projected_file, force):
check_allow_missing_files(allow_missing_files, ivector_file)
# load feature # load feature
ivector = algorithm.read_feature(ivector_file) ivector = algorithm.read_feature(ivector_file)
# project feature # project feature
...@@ -290,7 +330,16 @@ def train_plda(algorithm, force=False): ...@@ -290,7 +330,16 @@ def train_plda(algorithm, force=False):
else: else:
input_label = 'whitened' input_label = 'whitened'
train_files = fs.training_list(input_label, 'train_projector', arrange_by_client = True) train_files = fs.training_list(input_label, 'train_projector', arrange_by_client = True)
train_features = [[bob.bio.base.load(filename) for filename in client_files] for client_files in train_files] #train_features = [[bob.bio.base.load(filename) for filename in client_files] for client_files in train_files]
train_features = []
for client_files in train_files:
client_features = []
for filename in client_files:
check_allow_missing_files(allow_missing_files, filename)
client_features.append(bob.bio.base.load(filename))
train_features.append(client_features)
# perform training # perform training
algorithm.train_plda(train_features) algorithm.train_plda(train_features)
bob.io.base.create_directories_safe(os.path.dirname(fs.plda_file)) bob.io.base.create_directories_safe(os.path.dirname(fs.plda_file))
......
import bob.bio.base import bob.bio.base
import numpy import numpy
import os
def add_jobs(args, submitter, local_job_adder): def add_jobs(args, submitter, local_job_adder):
"""Adds all (desired) jobs of the tool chain to the grid, or to the local list to be executed.""" """Adds all (desired) jobs of the tool chain to the grid, or to the local list to be executed."""
...@@ -63,7 +64,14 @@ def base(algorithm): ...@@ -63,7 +64,14 @@ def base(algorithm):
"""Returns the base algorithm, if it is a video extension, otherwise returns the algorithm itself""" """Returns the base algorithm, if it is a video extension, otherwise returns the algorithm itself"""
return algorithm.algorithm if is_video_extension(algorithm) else algorithm return algorithm.algorithm if is_video_extension(algorithm) else algorithm
def read_feature(extractor, feature_file): def read_feature(extractor, feature_file, allow_missing_files = False):
if not os.path.exists(feature_file):
if allow_missing_files:
logger.debug("... Cannot find preprocessed data file %s; skipping", feature_file)
else:
raise RuntimeError("Cannot find file '%s' " % feature_file)
feature = extractor.read_feature(feature_file) feature = extractor.read_feature(feature_file)
try: try:
import bob.bio.video import bob.bio.video
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment