Commit a700c4e4 authored by Milos CERNAK's avatar Milos CERNAK

Merge branch 'ivector-update' into 'master'

Add ivector+PLDA support

See merge request !5
parents 84de208d a92b3655
Pipeline #10546 passed with stages
in 8 minutes and 7 seconds
......@@ -332,6 +332,7 @@ def ubm_full_train(feats, dubm, fubmfile, num_gselect=20, num_iters=4,
delete=False, suffix='.dump') as estfile:
cmd5 += [
opt,
'--binary=false',
'--min-gaussian-weight=' +
str(min_gaussian_weight),
inModel,
......@@ -352,8 +353,10 @@ def ubm_full_train(feats, dubm, fubmfile, num_gselect=20, num_iters=4,
shutil.copyfile(estfile.name, fubmfile)
os.unlink(estfile.name)
os.unlink(dubmfile.name)
return fubmfile # ToDo : covert to a text format
with open(fubmfile) as fp:
fubmtxt = fp.read()
return fubmtxt
def ubm_enroll(feats, ubm):
"""Performes MAP adaptation of GMM-UBM model.
......
This diff is collapsed.
......@@ -45,10 +45,10 @@ def test_ubm_full_train():
dubm = bob.kaldi.ubm_train(array, temp_dubm_file, num_gauss=2,
num_gselect=2, num_iters=2)
# Train small full GMM
ubm = bob.kaldi.ubm_full_train(array, dubm, temp_fubm_file,
fubm = bob.kaldi.ubm_full_train(array, dubm, temp_fubm_file,
num_gselect=2, num_iters=2)
assert os.path.exists(ubm)
assert fubm.find('FullGMM')
def test_ubm_enroll():
......
......@@ -36,7 +36,7 @@ def test_ivector_train():
ivector = bob.kaldi.ivector_train(array, fubm, temp_ivec_file,
num_gselect=2, ivector_dim=20, num_iters=2)
assert os.path.exists(ivector)
assert ivector.find('IvectorExtractor')
def test_ivector_extract():
......@@ -72,36 +72,43 @@ def test_ivector_extract():
def test_plda_train():
temp_file = bob.io.base.test_utils.temporary_filename()
plda_file = bob.io.base.test_utils.temporary_filename()
mean_file = bob.io.base.test_utils.temporary_filename()
features = pkg_resources.resource_filename(
__name__, 'data/feats-mobio.npy')
feats = np.load(features)
# Train PLDA
plda = bob.kaldi.plda_train(feats, temp_file)
plda = bob.kaldi.plda_train(feats, plda_file, mean_file)
assert os.path.exists(temp_file + '.plda')
assert os.path.exists(temp_file + '.plda.mean')
assert plda[0].find('Plda')
assert os.path.exists(mean_file)
def test_plda_enroll():
temp_file = bob.io.base.test_utils.temporary_filename()
plda_file = bob.io.base.test_utils.temporary_filename()
mean_file = bob.io.base.test_utils.temporary_filename()
features = pkg_resources.resource_filename(
__name__, 'data/feats-mobio.npy')
feats = np.load(features)
# Train PLDA
plda = bob.kaldi.plda_enroll(feats, temp_file)
plda = bob.kaldi.plda_train(feats, plda_file, mean_file)
assert os.path.exists(plda)
# Enroll; plda[0] - PLDA model, plda[1] - PLDA global mean
enrolled = bob.kaldi.plda_enroll(feats[0], plda[1])
assert enrolled.find('spk36')
def test_plda_score():
temp_file = bob.io.base.test_utils.temporary_filename()
plda_file = bob.io.base.test_utils.temporary_filename()
mean_file = bob.io.base.test_utils.temporary_filename()
spk_file = bob.io.base.test_utils.temporary_filename()
test_file = pkg_resources.resource_filename(
__name__, 'data/test-mobio.ivector')
features = pkg_resources.resource_filename(
......@@ -111,10 +118,10 @@ def test_plda_score():
test_feats = np.loadtxt(test_file)
# Train PLDA
plda = bob.kaldi.plda_train(train_feats, temp_file)
# Enroll PLDA (average speaker)
enrolled = bob.kaldi.plda_enroll(train_feats[0], temp_file)
plda = bob.kaldi.plda_train(train_feats, plda_file, mean_file)
# Enroll PLDA (for the first speaker)
enrolled = bob.kaldi.plda_enroll(train_feats[0], plda[1])
# Score PLDA
score = bob.kaldi.plda_score(test_feats, enrolled, temp_file)
score = bob.kaldi.plda_score(test_feats, enrolled, plda[0], plda[1])
assert np.allclose(score, [-23.9922], 1e-03, 1e-05)
......@@ -7,11 +7,11 @@
import bob.kaldi
import bob.io.audio
import tempfile
import os
import numpy
=======================
Using Kaldi in Python
=======================
================================
Speaker recognition evaluation
================================
MFCC Extraction
---------------
......@@ -75,33 +75,30 @@ are supported, speakers can be enrolled and scored:
>>> print ('%.3f' % score)
0.282
Following guide describes how to run whole speaker recognition experiments:
iVector + PLDA training and evaluation
--------------------------------------
1. To run the UBM-GMM with MAP adaptation speaker recognition experiment, run:
The implementation is based on Kaldi recipe SRE10_. It includes
ivector extrator training from full-diagonal GMMs, PLDA model
training, and PLDA scoring.
.. code-block:: sh
verify.py -d 'mobio-audio-male' -p 'energy-2gauss' -e 'mfcc-kaldi' -a 'gmm-kaldi' -s exp-gmm-kaldi --groups {dev,eval} -R '/your/work/directory/' -T '/your/temp/directory' -vv
2. To run the ivector+plda speaker recognition experiment, run:
.. code-block:: sh
verify.py -d 'mobio-audio-male' -p 'energy-2gauss' -e 'mfcc-kaldi' -a 'ivector-plda-kaldi' -s exp-ivector-plda-kaldi --groups {dev,eval} -R '/your/work/directory/' -T '/your/temp/directory' -vv
3. Results:
.. doctest::
+---------------------------------------------------+--------+--------+
| Experiment description | EER | HTER |
+---------------------------------------------------+--------+--------+
| -e 'mfcc-kaldi', -a 'gmm-kadi', 100GMM | 18.53% | 14.52% |
+---------------------------------------------------+--------+--------+
| -e 'mfcc-kaldi', -a 'gmm-kadi', 512GMM | 17.51% | 12.44% |
+---------------------------------------------------+--------+--------+
| -e 'mfcc-kaldi', -a 'ivector-plda-kaldi', 64GMM | 12.26% | 11.97% |
+---------------------------------------------------+--------+--------+
| -e 'mfcc-kaldi', -a 'ivector-plda-kaldi', 256GMM | 11.35% | 11.46% |
+---------------------------------------------------+--------+--------+
>>> plda_file = tempfile.NamedTemporaryFile()
>>> mean_file = tempfile.NamedTemporaryFile()
>>> spk_file = tempfile.NamedTemporaryFile()
>>> test_file = pkg_resources.resource_filename('bob.kaldi', 'test/data/test-mobio.ivector')
>>> features = pkg_resources.resource_filename('bob.kaldi', 'test/data/feats-mobio.npy')
>>> train_feats = numpy.load(features)
>>> test_feats = numpy.loadtxt(test_file)
>>> # Train PLDA model; plda[0] - PLDA model, plda[1] - global mean
>>> plda = bob.kaldi.plda_train(train_feats, plda_file.name, mean_file.name)
>>> # Speaker enrollment (calculate average iVectors for the first speaker)
>>> enrolled = bob.kaldi.plda_enroll(train_feats[0], plda[1])
>>> # Calculate PLDA score
>>> score = bob.kaldi.plda_score(test_feats, enrolled, plda[0], plda[1])
>>> print ('%.4f' % score)
-23.9922
.. include:: links.rst
......@@ -22,3 +22,4 @@
.. _zc.buildout: http://pypi.python.org/pypi/zc.buildout/
.. _mr.developer: http://pypi.python.org/pypi/mr.developer/
.. _kaldi: http://kaldi-asr.org/
.. _sre10: https://github.com/kaldi-asr/kaldi/tree/master/egs/sre10/v1
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment