Skip to content
Snippets Groups Projects
Commit df13718f authored by akomaty@idiap.ch's avatar akomaty@idiap.ch
Browse files

solved some conflicts

parent 5d053949
Branches diarization
No related tags found
1 merge request!7Diarization
Pipeline #
...@@ -169,7 +169,9 @@ def test_gmm_segment(): ...@@ -169,7 +169,9 @@ def test_gmm_segment():
assert isinstance(projected, list) assert isinstance(projected, list)
projected_ref = pkg_resources.resource_filename('bob.bio.gmm.test', 'data/gmmsegment_projected.hdf5') projected_ref = pkg_resources.resource_filename('bob.bio.gmm.test', 'data/gmmsegment_projected.hdf5')
import ipdb; ipdb.set_trace()
gmm2.write_feature(projected, temp_file)
gmm1.write_feature(projected, temp_file) gmm1.write_feature(projected, temp_file)
hdf5fileref = bob.io.base.HDF5File(projected_ref, 'r') hdf5fileref = bob.io.base.HDF5File(projected_ref, 'r')
......
...@@ -3,6 +3,7 @@ import bob.learn.em ...@@ -3,6 +3,7 @@ import bob.learn.em
import shutil import shutil
import numpy import numpy
import os import os
import functools
import logging import logging
logger = logging.getLogger("bob.bio.gmm") logger = logging.getLogger("bob.bio.gmm")
...@@ -12,7 +13,7 @@ from bob.bio.base import utils, tools ...@@ -12,7 +13,7 @@ from bob.bio.base import utils, tools
from .utils import read_feature from .utils import read_feature
from bob.bio.gmm.algorithm import GMMSegment from bob.bio.gmm.algorithm import GMMSegment
def kmeans_initialize(algorithm, extractor, limit_data = None, force = False): def kmeans_initialize(algorithm, extractor, limit_data = None, force = False, allow_missing_files = False):
"""Initializes the K-Means training (non-parallel).""" """Initializes the K-Means training (non-parallel)."""
fs = FileSelector.instance() fs = FileSelector.instance()
...@@ -24,7 +25,9 @@ def kmeans_initialize(algorithm, extractor, limit_data = None, force = False): ...@@ -24,7 +25,9 @@ def kmeans_initialize(algorithm, extractor, limit_data = None, force = False):
# read data # read data
logger.info("UBM training: initializing kmeans") logger.info("UBM training: initializing kmeans")
training_list = utils.selected_elements(fs.training_list('extracted', 'train_projector'), limit_data) training_list = utils.selected_elements(fs.training_list('extracted', 'train_projector'), limit_data)
data = numpy.vstack([read_feature(extractor, feature_file) for feature_file in training_list]) # read the features
reader = functools.partial(read_feature, extractor)
data = utils.vstack_features(reader, training_list, allow_missing_files=allow_missing_files)
# Perform KMeans initialization # Perform KMeans initialization
kmeans_machine = bob.learn.em.KMeansMachine(algorithm.gaussians, data.shape[1]) kmeans_machine = bob.learn.em.KMeansMachine(algorithm.gaussians, data.shape[1])
...@@ -35,7 +38,7 @@ def kmeans_initialize(algorithm, extractor, limit_data = None, force = False): ...@@ -35,7 +38,7 @@ def kmeans_initialize(algorithm, extractor, limit_data = None, force = False):
logger.info("UBM training: saved initial KMeans machine to '%s'", output_file) logger.info("UBM training: saved initial KMeans machine to '%s'", output_file)
def kmeans_estep(algorithm, extractor, iteration, indices, force=False): def kmeans_estep(algorithm, extractor, iteration, indices, force=False, allow_missing_files = False):
"""Performs a single E-step of the K-Means algorithm (parallel)""" """Performs a single E-step of the K-Means algorithm (parallel)"""
if indices[0] >= indices[1]: if indices[0] >= indices[1]:
return return
...@@ -55,8 +58,12 @@ def kmeans_estep(algorithm, extractor, iteration, indices, force=False): ...@@ -55,8 +58,12 @@ def kmeans_estep(algorithm, extractor, iteration, indices, force=False):
logger.info("UBM training: KMeans E-Step round %d from range(%d, %d)", iteration, *indices) logger.info("UBM training: KMeans E-Step round %d from range(%d, %d)", iteration, *indices)
# read data # read the features
data = numpy.vstack([read_feature(extractor, training_list[index]) for index in range(indices[0], indices[1])]) reader = functools.partial(read_feature, extractor)
data = utils.vstack_features(
reader,
(training_list[index] for index in range(indices[0], indices[1])),
allow_missing_files=allow_missing_files)
# Performs the E-step # Performs the E-step
trainer = algorithm.kmeans_trainer trainer = algorithm.kmeans_trainer
...@@ -95,7 +102,7 @@ def _accumulate(filenames): ...@@ -95,7 +102,7 @@ def _accumulate(filenames):
zeroeth += zeroeth_ zeroeth += zeroeth_
first += first_ first += first_
nsamples += nsamples_ nsamples += nsamples_
dist += dist_ dist += dist_
return (zeroeth, first, nsamples, dist) return (zeroeth, first, nsamples, dist)
def kmeans_mstep(algorithm, iteration, number_of_parallel_jobs, force=False, clean=False): def kmeans_mstep(algorithm, iteration, number_of_parallel_jobs, force=False, clean=False):
...@@ -156,7 +163,7 @@ def kmeans_mstep(algorithm, iteration, number_of_parallel_jobs, force=False, cle ...@@ -156,7 +163,7 @@ def kmeans_mstep(algorithm, iteration, number_of_parallel_jobs, force=False, cle
def gmm_initialize(algorithm, extractor, limit_data = None, force = False): def gmm_initialize(algorithm, extractor, limit_data = None, force = False, allow_missing_files = False):
"""Initializes the GMM calculation with the result of the K-Means algorithm (non-parallel). """Initializes the GMM calculation with the result of the K-Means algorithm (non-parallel).
This might require a lot of memory.""" This might require a lot of memory."""
fs = FileSelector.instance() fs = FileSelector.instance()
...@@ -168,9 +175,11 @@ def gmm_initialize(algorithm, extractor, limit_data = None, force = False): ...@@ -168,9 +175,11 @@ def gmm_initialize(algorithm, extractor, limit_data = None, force = False):
else: else:
logger.info("UBM Training: Initializing GMM") logger.info("UBM Training: Initializing GMM")
# read features
training_list = utils.selected_elements(fs.training_list('extracted', 'train_projector'), limit_data) training_list = utils.selected_elements(fs.training_list('extracted', 'train_projector'), limit_data)
data = numpy.vstack([read_feature(extractor, feature_file) for feature_file in training_list])
# read the features
reader = functools.partial(read_feature, extractor)
data = utils.vstack_features(reader, training_list, allow_missing_files=allow_missing_files)
# get means and variances of kmeans result # get means and variances of kmeans result
kmeans_machine = bob.learn.em.KMeansMachine(bob.io.base.HDF5File(fs.kmeans_file)) kmeans_machine = bob.learn.em.KMeansMachine(bob.io.base.HDF5File(fs.kmeans_file))
...@@ -191,7 +200,7 @@ def gmm_initialize(algorithm, extractor, limit_data = None, force = False): ...@@ -191,7 +200,7 @@ def gmm_initialize(algorithm, extractor, limit_data = None, force = False):
logger.info("UBM Training: Wrote GMM file '%s'", output_file) logger.info("UBM Training: Wrote GMM file '%s'", output_file)
def gmm_estep(algorithm, extractor, iteration, indices, force=False): def gmm_estep(algorithm, extractor, iteration, indices, force=False, allow_missing_files = False):
"""Performs a single E-step of the GMM training (parallel).""" """Performs a single E-step of the GMM training (parallel)."""
if indices[0] >= indices[1]: if indices[0] >= indices[1]:
return return
...@@ -209,8 +218,13 @@ def gmm_estep(algorithm, extractor, iteration, indices, force=False): ...@@ -209,8 +218,13 @@ def gmm_estep(algorithm, extractor, iteration, indices, force=False):
logger.info("UBM training: GMM E-Step from range(%d, %d)", *indices) logger.info("UBM training: GMM E-Step from range(%d, %d)", *indices)
# read data # read the features
data = numpy.vstack([read_feature(extractor, training_list[index]) for index in range(indices[0], indices[1])]) reader = functools.partial(read_feature, extractor)
data = utils.vstack_features(
reader,
(training_list[index] for index in range(indices[0], indices[1]))
, allow_missing_files=allow_missing_files)
trainer = algorithm.ubm_trainer trainer = algorithm.ubm_trainer
trainer.initialize(gmm_machine, None) trainer.initialize(gmm_machine, None)
...@@ -282,7 +296,7 @@ def gmm_mstep(algorithm, iteration, number_of_parallel_jobs, force=False, clean= ...@@ -282,7 +296,7 @@ def gmm_mstep(algorithm, iteration, number_of_parallel_jobs, force=False, clean=
shutil.rmtree(old_dir) shutil.rmtree(old_dir)
def gmm_project(algorithm, extractor, indices, force=False): def gmm_project(algorithm, extractor, indices, force=False, allow_missing_files = False):
"""Performs GMM projection""" """Performs GMM projection"""
fs = FileSelector.instance() fs = FileSelector.instance()
...@@ -299,11 +313,11 @@ def gmm_project(algorithm, extractor, indices, force=False): ...@@ -299,11 +313,11 @@ def gmm_project(algorithm, extractor, indices, force=False):
projected_file = projected_files[i] projected_file = projected_files[i]
if not utils.check_file(projected_file, force): if not utils.check_file(projected_file, force):
# load feature if len(utils.filter_missing_files([feature_file], split_by_client=False, allow_missing_files=allow_missing_files)) > 0:
feature = read_feature(extractor, feature_file) # load feature
# project feature feature = read_feature(extractor, feature_file)
projected = algorithm.project_ubm(feature) # project feature
# write it projected = algorithm.project_ubm(feature)
bob.io.base.create_directories_safe(os.path.dirname(projected_file)) # write it
bob.io.base.create_directories_safe(os.path.dirname(projected_file))
algorithm.write_feature(projected, projected_file) algorithm.write_feature(projected, projected_file)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment