Commit dd4bef0b authored by Milos CERNAK's avatar Milos CERNAK

Update the guide and returning UBM model formats

parent 59c11be9
Pipeline #9392 failed with stages
in 9 minutes and 38 seconds
......@@ -57,7 +57,7 @@ def ubm_train(feats, ubmname, num_threads=4, num_frames=500000,
Returns
-------
str
A path to the the trained ubm model.
A text formatted trained Kaldi global DiagGMM model.
"""
......@@ -66,6 +66,7 @@ def ubm_train(feats, ubmname, num_threads=4, num_frames=500000,
binary3 = 'gmm-gselect'
binary4 = 'gmm-global-acc-stats'
binary5 = 'gmm-global-est'
binary6 = 'gmm-global-copy'
# 1. Initialize a single diagonal GMM
cmd1 = [binary1] # gmm-global-init-from-feats
......@@ -175,16 +176,34 @@ def ubm_train(feats, ubmname, num_threads=4, num_frames=500000,
os.unlink(inModel)
inModel = estfile.name
os.unlink(gselfile.name)
# 6. Copy a single diagonal GMM as text string (for the BEAT platform)
ret = ""
with tempfile.NamedTemporaryFile(suffix='.txt') as txtfile, \
tempfile.NamedTemporaryFile(suffix='.log') as logfile:
cmd = [binary6] # gmm-global-copy
cmd += [
'--binary=false',
estfile.name,
txtfile.name,
]
pipe = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=logfile)
pipe.communicate()
with open(logfile.name) as fp:
logtxt = fp.read()
logger.debug("%s", logtxt)
with open(txtfile.name, 'rt') as f:
ubmtxt = f.read()
ret = ubmtxt
shutil.copyfile(estfile.name, ubmname)
shutil.copyfile(estfile.name, ubmname + '.dubm')
os.unlink(estfile.name)
return ubmname + '.dubm'
os.unlink(gselfile.name)
return ret
def ubm_full_train(feats, dubmname, num_gselect=20, num_iters=4,
def ubm_full_train(feats, dubm, fubmfile, num_gselect=20, num_iters=4,
min_gaussian_weight=1.0e-04):
""" Implements Kaldi egs/sre10/v1/train_full_ubm.sh
......@@ -192,10 +211,10 @@ def ubm_full_train(feats, dubmname, num_gselect=20, num_iters=4,
----------
feats : numpy.ndarray
A 2D numpy ndarray object containing MFCCs.
dubmname : str
A path to the UBM model.
dubm : str
A text formatted trained Kaldi global DiagGMM model.
fubmfile : str
A path to the full covariance UBM model.
num_gselect : :obj:`int`, optional
Number of Gaussians to keep per frame.
num_iters : :obj:`int`, optional
......@@ -207,19 +226,22 @@ def ubm_full_train(feats, dubmname, num_gselect=20, num_iters=4,
Returns
-------
str
A path to the the trained full covariance UBM model.
A path to the full covariance UBM model.
"""
binary1 = 'gmm-global-to-fgmm'
binary2 = 'fgmm-global-to-gmm'
# binary2 = 'fgmm-global-to-gmm'
binary3 = 'subsample-feats'
binary4 = 'gmm-gselect'
binary5 = 'fgmm-global-acc-stats'
binary6 = 'fgmm-global-est'
origdubm = dubmname
dubmname += '.dubm'
# Convert UBM string to a file
with tempfile.NamedTemporaryFile(
delete=False, suffix='.dump') as dubmfile:
with open(dubmfile.name, 'wt') as fp:
fp.write(dubm)
# 1. Init (diagonal GMM to full-cov. GMM)
# gmm-global-to-fgmm $srcdir/final.dubm $dir/0.ubm || exit 1;
......@@ -229,7 +251,7 @@ def ubm_full_train(feats, dubmname, num_gselect=20, num_iters=4,
initfile, tempfile.NamedTemporaryFile(suffix='.log') as logfile:
inModel = initfile.name
cmd1 += [
dubmname,
dubmfile.name,
inModel,
]
pipe1 = Popen(cmd1, stdin=PIPE, stdout=PIPE, stderr=logfile)
......@@ -243,20 +265,10 @@ def ubm_full_train(feats, dubmname, num_gselect=20, num_iters=4,
# gmm-gselect --n=$num_gselect "fgmm-global-to-gmm $dir/0.ubm - \
# |" "$feats" \
# "ark:|gzip -c >$dir/gselect.JOB.gz" || exit 1;
cmd2 = [binary2] # fgmm-global-to-gmm
with tempfile.NamedTemporaryFile(suffix='.dubm') as dubmfile, \
tempfile.NamedTemporaryFile(suffix='.ark') as arkfile, \
# cmd2 = [binary2] # fgmm-global-to-gmm
# with tempfile.NamedTemporaryFile(suffix='.dubm') as dubmfile, \
with tempfile.NamedTemporaryFile(suffix='.ark') as arkfile, \
tempfile.NamedTemporaryFile(suffix='.gz') as gselfile:
cmd2 += [
inModel,
dubmfile.name,
]
with tempfile.NamedTemporaryFile(suffix='.log') as logfile:
pipe2 = Popen(cmd2, stdin=PIPE, stdout=PIPE, stderr=logfile)
pipe2.communicate()
with open(logfile.name) as fp:
logtxt = fp.read()
logger.debug("%s", logtxt)
# subsample-feats --n=$subsample ark:- ark:- |"
cmd = [binary3] # subsample-feats
cmd += [
......@@ -337,36 +349,43 @@ def ubm_full_train(feats, dubmname, num_gselect=20, num_iters=4,
os.unlink(inModel)
inModel = estfile.name
shutil.copyfile(estfile.name, origdubm + '.fubm')
shutil.copyfile(estfile.name, fubmfile)
os.unlink(estfile.name)
os.unlink(dubmfile.name)
return origdubm + '.fubm'
return fubmfile # ToDo : covert to a text format
def ubm_enroll(feats, ubm_file):
def ubm_enroll(feats, ubm):
"""Performes MAP adaptation of GMM-UBM model.
Parameters
----------
feats : numpy.ndarray
A 2D numpy ndarray object containing MFCCs.
ubm_file : str
A path to the Kaldi global GMM.
ubm : str
A text formatted Kaldi global DiagGMM.
Returns
-------
str
A path to the enrolled GMM.
A text formatted Kaldi enrolled DiagGMM.
"""
binary1 = 'gmm-global-acc-stats'
binary2 = 'global-gmm-adapt-map'
binary3 = 'gmm-global-copy'
with tempfile.NamedTemporaryFile(
delete=False, suffix='.dump') as ubmfile:
with open(ubmfile.name, 'wt') as fp:
fp.write(ubm)
# 1. Accumulate stats for training a diagonal-covariance GMM.
cmd1 = [binary1] # gmm-global-acc-stats
cmd1 += [
ubm_file,
ubmfile.name,
'ark:-',
'-',
]
......@@ -375,7 +394,7 @@ def ubm_enroll(feats, ubm_file):
estfile, tempfile.NamedTemporaryFile(suffix='.log') as logfile:
cmd2 += [
'--update-flags=m',
ubm_file,
ubmfile.name,
'-',
estfile.name,
]
......@@ -392,9 +411,33 @@ def ubm_enroll(feats, ubm_file):
logtxt = fp.read()
logger.debug("%s", logtxt)
return estfile.name
def gmm_score(feats, gmm_file, ubm_file):
# 3. Copy adapted diagonal GMM as text string (for the BEAT platform)
ret = ""
with tempfile.NamedTemporaryFile(suffix='.txt') as txtfile, \
tempfile.NamedTemporaryFile(suffix='.log') as logfile:
cmd = [binary3] # gmm-global-copy
cmd += [
'--binary=false',
estfile.name,
txtfile.name,
]
pipe = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=logfile)
pipe.communicate()
with open(logfile.name) as fp:
logtxt = fp.read()
logger.debug("%s", logtxt)
with open(txtfile.name, 'rt') as f:
ubmtxt = f.read()
ret = ubmtxt
os.unlink(ubmfile.name)
os.unlink(estfile.name)
return ret
def gmm_score(feats, spkubm, ubm):
"""Print out per-frame log-likelihoods for input utterance.
Parameters
......@@ -402,10 +445,10 @@ def gmm_score(feats, gmm_file, ubm_file):
feats : numpy.ndarray
A 2D numpy ndarray object containing MFCCs.
gmm_file : str
A path to Kaldi adapted global GMM.
ubm_file : str
A path to Kaldi global GMM.
spkubm : str
A text formatted Kaldi adapted global DiagGMM.
ubm : str
A text formatted Kaldi global DiagGMM.
Returns
......@@ -416,10 +459,22 @@ def gmm_score(feats, gmm_file, ubm_file):
"""
binary1 = 'gmm-global-get-frame-likes'
# Convert UBM string to a file
with tempfile.NamedTemporaryFile(
delete=False, suffix='.dubm') as ubmfile:
with open(ubmfile.name, 'wt') as fp:
fp.write(ubm)
# Convert speaker UBM string to a file
with tempfile.NamedTemporaryFile(
delete=False, suffix='.dubm') as spkubmfile:
with open(spkubmfile.name, 'wt') as fp:
fp.write(spkubm)
models = [
gmm_file,
ubm_file
spkubmfile.name,
ubmfile.name
]
ret = [0, 0]
# import ipdb; ipdb.set_trace()
......@@ -449,6 +504,8 @@ def gmm_score(feats, gmm_file, ubm_file):
logtxt = fp.read()
logger.debug("%s", logtxt)
os.unlink(ubmfile.name)
os.unlink(spkubmfile.name)
return ret[0] - ret[1]
# def gmm_score_fast(feats, gmm_file, ubm_file):
......
......@@ -16,18 +16,20 @@ import logging
logger = logging.getLogger(__name__)
def ivector_train(feats, projector_file, num_gselect=20, ivector_dim=600,
use_weights=False, num_iters=5, min_post=0.025,
num_samples_for_weights=3, posterior_scale=1.0):
def ivector_train(feats, fubm, ivector_extractor, num_gselect=20,
ivector_dim=600, use_weights=False, num_iters=5,
min_post=0.025, num_samples_for_weights=3,
posterior_scale=1.0):
"""Implements Kaldi egs/sre10/v1/train_ivector_extractor.sh
Parameters
----------
feats : numpy.ndarray
A 2D numpy ndarray object containing MFCCs.
projector_file : str
A path to global GMM file
fubm : str
A path to full-diagonal UBM file
ivector_extractor : str
A path to the ivector extractor
num_gselect : :obj:`int`, optional
Number of Gaussians to keep per frame.
......@@ -61,8 +63,6 @@ def ivector_train(feats, projector_file, num_gselect=20, ivector_dim=600,
binary6 = 'ivector-extractor-acc-stats'
binary7 = 'ivector-extractor-est'
fgmm_model = projector_file + '.fubm'
# 1. Create Kaldi training data structure
# ToDo: implement Bob's function for that
with tempfile.NamedTemporaryFile(delete=False, suffix='.ark') as arkfile:
......@@ -79,7 +79,7 @@ def ivector_train(feats, projector_file, num_gselect=20, ivector_dim=600,
with tempfile.NamedTemporaryFile(delete=False, suffix='.dubm') as \
dubmfile, tempfile.NamedTemporaryFile(suffix='.log') as logfile:
cmd1 += [
fgmm_model,
fubm,
dubmfile.name,
]
pipe1 = Popen(cmd1, stdin=PIPE, stdout=PIPE, stderr=logfile)
......@@ -94,7 +94,7 @@ def ivector_train(feats, projector_file, num_gselect=20, ivector_dim=600,
cmd2 += [
'--ivector-dim=' + str(ivector_dim),
'--use-weights=' + str(use_weights).lower(),
fgmm_model,
fubm,
iefile.name,
]
pipe2 = Popen(cmd2, stdin=PIPE, stdout=PIPE, stderr=logfile)
......@@ -130,7 +130,7 @@ def ivector_train(feats, projector_file, num_gselect=20, ivector_dim=600,
cmd4 = [binary4] # fgmm-global-gselect-to-post
cmd4 += [
'--min-post=' + str(min_post),
fgmm_model,
fubm,
'ark:' + arkfile.name,
'ark:' + gselfile.name,
'ark:-',
......@@ -201,21 +201,23 @@ def ivector_train(feats, projector_file, num_gselect=20, ivector_dim=600,
os.unlink(inModel)
inModel = estfile.name
shutil.copyfile(inModel, projector_file + '.ie')
shutil.copyfile(inModel, ivector_extractor)
os.unlink(inModel)
return projector_file + '.ie'
return ivector_extractor # ToDo: covert to the string
def ivector_extract(feats, projector_file, num_gselect=20, min_post=0.025,
posterior_scale=1.0):
def ivector_extract(feats, fubm, ivector_extractor, num_gselect=20,
min_post=0.025, posterior_scale=1.0):
"""Implements Kaldi egs/sre10/v1/extract_ivectors.sh
Parameters
----------
feats : numpy.ndarray
A 2D numpy ndarray object containing MFCCs.
projector_file : str
fubm : str
A path to full-diagonal UBM file
ivector_extractor : str
A path to global GMM file.
num_gselect : :obj:`int`, optional
Number of Gaussians to keep per frame.
......@@ -241,14 +243,13 @@ def ivector_extract(feats, projector_file, num_gselect=20, min_post=0.025,
# import ipdb; ipdb.set_trace()
# ivector-extract --verbose=2 $srcdir/final.ie "$feats" ark,s,cs:- \
# ark,scp,t:$dir/ivector.JOB.ark,$dir/ivector.JOB.scp || exit 1;
fgmm_model = projector_file + '.fubm'
# Initialize the i-vector extractor using the FGMM input
cmd1 = [binary1] # fgmm-global-to-gmm
with tempfile.NamedTemporaryFile(delete=False, suffix='.dubm') as \
dubmfile, tempfile.NamedTemporaryFile(suffix='.log') as logfile:
cmd1 += [
fgmm_model,
fubm,
dubmfile.name,
]
pipe1 = Popen(cmd1, stdin=PIPE, stdout=PIPE, stderr=logfile)
......@@ -277,7 +278,7 @@ def ivector_extract(feats, projector_file, num_gselect=20, min_post=0.025,
cmd2 = [binary3] # fgmm-global-gselect-to-post
cmd2 += [
'--min-post=' + str(min_post),
fgmm_model,
fubm,
'ark:-',
'ark,s,cs:' + gselfile.name,
'ark:-',
......@@ -302,7 +303,7 @@ def ivector_extract(feats, projector_file, num_gselect=20, min_post=0.025,
cmd4 = [binary5] # ivector-extract
cmd4 += [
projector_file + '.ie',
ivector_extractor,
'ark:-',
'ark,s,cs:' + postfile.name,
'ark:-',
......
......@@ -28,12 +28,14 @@ def test_ubm_train():
dubm = bob.kaldi.ubm_train(array, temp_file, num_gauss=2,
num_gselect=2, num_iters=2)
assert os.path.exists(dubm)
# assert os.path.exists(dubm)
assert dubm.find('DiagGMM')
def test_ubm_full_train():
temp_dubm_file = bob.io.base.test_utils.temporary_filename()
temp_fubm_file = bob.io.base.test_utils.temporary_filename()
sample = pkg_resources.resource_filename(__name__, 'data/sample16k.wav')
data = bob.io.audio.reader(sample)
......@@ -43,7 +45,7 @@ def test_ubm_full_train():
dubm = bob.kaldi.ubm_train(array, temp_dubm_file, num_gauss=2,
num_gselect=2, num_iters=2)
# Train small full GMM
ubm = bob.kaldi.ubm_full_train(array, temp_dubm_file,
ubm = bob.kaldi.ubm_full_train(array, dubm, temp_fubm_file,
num_gselect=2, num_iters=2)
assert os.path.exists(ubm)
......@@ -64,8 +66,8 @@ def test_ubm_enroll():
# Perform MAP adaptation of the GMM
spk_model = bob.kaldi.ubm_enroll(array, dubm)
assert os.path.exists(spk_model)
# assert os.path.exists(spk_model)
assert spk_model.find('DiagGMM')
def test_gmm_score():
......
......@@ -18,6 +18,9 @@ import bob.kaldi
def test_ivector_train():
temp_dubm_file = bob.io.base.test_utils.temporary_filename()
temp_fubm_file = bob.io.base.test_utils.temporary_filename()
temp_ivec_file = bob.io.base.test_utils.temporary_filename()
sample = pkg_resources.resource_filename(__name__, 'data/sample16k.wav')
data = bob.io.audio.reader(sample)
......@@ -27,11 +30,11 @@ def test_ivector_train():
dubm = bob.kaldi.ubm_train(array, temp_dubm_file, num_gauss=2,
num_gselect=2, num_iters=2)
# Train small full GMM
ubm = bob.kaldi.ubm_full_train(array, temp_dubm_file,
num_gselect=2, num_iters=2)
fubm = bob.kaldi.ubm_full_train(array, dubm, temp_fubm_file,
num_gselect=2, num_iters=2)
# Train small ivector extractor
ivector = bob.kaldi.ivector_train(
array, temp_dubm_file, num_gselect=2, ivector_dim=20, num_iters=2)
ivector = bob.kaldi.ivector_train(array, fubm, temp_ivec_file,
num_gselect=2, ivector_dim=20, num_iters=2)
assert os.path.exists(ivector)
......@@ -39,6 +42,9 @@ def test_ivector_train():
def test_ivector_extract():
temp_dubm_file = bob.io.base.test_utils.temporary_filename()
temp_fubm_file = bob.io.base.test_utils.temporary_filename()
temp_ivec_file = bob.io.base.test_utils.temporary_filename()
sample = pkg_resources.resource_filename(__name__, 'data/sample16k.wav')
reference = pkg_resources.resource_filename(
__name__, 'data/sample16k.ivector')
......@@ -50,13 +56,13 @@ def test_ivector_extract():
dubm = bob.kaldi.ubm_train(array, temp_dubm_file, num_gauss=2,
num_gselect=2, num_iters=2)
# Train small full GMM
ubm = bob.kaldi.ubm_full_train(array, temp_dubm_file,
fubm = bob.kaldi.ubm_full_train(array, dubm, temp_fubm_file,
num_gselect=2, num_iters=2)
# Train small ivector extractor
ivector = bob.kaldi.ivector_train(
array, temp_dubm_file, num_gselect=2, ivector_dim=20, num_iters=2)
ivector = bob.kaldi.ivector_train(array, fubm, temp_ivec_file,
num_gselect=2, ivector_dim=20, num_iters=2)
# Extract ivector
ivector_array = bob.kaldi.ivector_extract(array, temp_dubm_file,
ivector_array = bob.kaldi.ivector_extract(array, fubm, ivector,
num_gselect=2)
theirs = np.loadtxt(reference)
......
......@@ -39,6 +39,20 @@ the filename as :obj:`str`:
>>> print (feat.shape)
(317, 39)
Voice Activity Detection (VAD)
------------------------------
A simple energy-based VAD is implemented in :py:func:`bob.kaldi.compute_vad`.
The function expects the speech samples as :obj:`numpy.ndarray` and the sampling
rate as :obj:`float`, and returns an array of VAD labels :obj:`numpy.ndarray`
with the labels of 0 (zero) or 1 (one) per speech frame:
.. doctest::
>>> VAD_labels = bob.kaldi.compute_vad(data.load()[0], data.rate)
>>> print (len(VAD_labels))
(317)
UBM training and evaluation
---------------------------
......@@ -49,18 +63,17 @@ are supported, speakers can be enrolled and scored:
.. doctest::
>>> # Train small diagonall GMM
>>> projector = tempfile.NamedTemporaryFile()
>>> dubm = bob.kaldi.ubm_train(feat, projector.name, num_gauss=2, num_gselect=2, num_iters=2)
>>> diag_gmm_file = tempfile.NamedTemporaryFile()
>>> full_gmm_file = tempfile.NamedTemporaryFile()
>>> dubm = bob.kaldi.ubm_train(feat, diag_gmm_file.name, num_gauss=2, num_gselect=2, num_iters=2)
>>> # Train small full GMM
>>> ubm = bob.kaldi.ubm_full_train(feat, projector.name, num_gselect=2, num_iters=2)
>>> ubm = bob.kaldi.ubm_full_train(feat, dubm, full_gmm_file.name, num_gselect=2, num_iters=2)
>>> # Enrollement - MAP adaptation of the UBM-GMM
>>> spk_model = bob.kaldi.ubm_enroll(feat, dubm)
>>> # GMM scoring
>>> score = bob.kaldi.gmm_score(feat, spk_model, dubm)
>>> print ('%.3f' % score)
0.282
>>> os.unlink(projector.name + '.dubm')
>>> os.unlink(projector.name + '.fubm')
Following guide describes how to run whole speaker recognition experiments:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment