Commit 5ef8b484 authored by Milos CERNAK's avatar Milos CERNAK

Add mfcc, plp with utt-based cmvn; add AMI DNN and DNN-VAD.

parent e3d59dd5
Pipeline #11925 failed with stages
in 5 minutes and 45 seconds
include LICENSE README.rst buildout.cfg develop.cfg version.txt requirements.txt
recursive-include doc conf.py *.rst
recursive-include bob/kaldi/test/dnn *.txt
recursive-include bob/kaldi/test/data *.wav *.txt *.npy *.ivector *.ie
from .mfcc import mfcc
from .mfcc import mfcc_from_path
from .mfcc import compute_vad
from .cepstral import cepstral
from .gmm import ubm_train
from .gmm import ubm_full_train
from .gmm import ubm_enroll
......@@ -11,7 +12,8 @@ from .ivector import ivector_extract
from .ivector import plda_train
from .ivector import plda_enroll
from .ivector import plda_score
from .dnn import nnet_forward
from .dnn import compute_dnn_vad
def get_config():
"""Returns a string containing the configuration information.
......
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Milos Cernak <milos.cernak@idiap.ch>
# August 28, 2017
import os
import numpy as np
from . import io
from subprocess import PIPE, Popen
from os.path import isfile
import tempfile
# import shutil
import logging
logger = logging.getLogger(__name__)
def cepstral(data, cepstral_type, rate=8000,
preemphasis_coefficient=0.97, raw_energy=True, delta_order=2,
frame_length=25, frame_shift=10, num_ceps=13,
num_mel_bins=23, cepstral_lifter=22, low_freq=20,
high_freq=0, dither=1.0, snip_edges=True, normalization=True):
"""Computes the cepstral (mfcc/plp) features for given speech samples.
Parameters
----------
data : numpy.ndarray
A 1D numpy ndarray object containing 64-bit float numbers with
the audio signal to calculate the cepstral features from. The
input needs to be normalized between [-1, 1].
rate : float
The sampling rate of the input signal in ``data``.
cepstral_type: str
The type of cepstral features: mfcc or plp
preemphasis_coefficient : :obj:`float`, optional
Coefficient for use in signal preemphasis
raw_energy : :obj:`bool`, optional
If true, compute energy before preemphasis and windowing
delta_order : :obj:`int`, optional
Add deltas to raw mfcc or plp features
frame_length : :obj:`int`, optional
Frame length in milliseconds
frame_shift : :obj:`int`, optional
Frame shift in milliseconds
num_ceps : :obj:`int`, optional
Number of cepstra in MFCC computation (including C0)
num_mel_bins : :obj:`int`, optional
Number of triangular mel-frequency bins
cepstral_lifter : :obj:`int`, optional
Constant that controls scaling of MFCCs
low_freq : :obj:`int`, optional
Low cutoff frequency for mel bins
high_freq : :obj:`int`, optional
High cutoff frequency for mel bins (if < 0, offset from Nyquist)
dither : :obj:`float`, optional
Dithering constant (0.0 means no dither)
snip_edges : :obj:`bool`, optional
If true, end effects will be handled by outputting only frames
that completely fit in the file, and the number of frames
depends on the frame-length. If false, the number of frames
depends only on the frame-shift, and we reflect the data at
the ends.
normalization : :obj:`bool`, optional
If true, the input samples in ``data`` are normalized to [-1, 1].
Returns
-------
numpy.ndarray
The cepstral features calculated for the input signal (2D
array of 32-bit floats).
"""
assert(cepstral_type == 'mfcc' or cepstral_type == 'plp')
binary1 = 'compute-' + cepstral_type + '-feats'
cmd1 = [binary1]
binary2 = 'compute-cmvn-stats'
cmd2 = [binary2]
binary3 = 'apply-cmvn'
cmd3 = [binary3]
binary4 = 'add-deltas'
cmd4 = [binary4]
# compute features plus deltas and sliding cmvn into the ark file
cmd1 += [
'--sample-frequency=' + str(rate),
'--preemphasis-coefficient=' + str(preemphasis_coefficient),
'--raw-energy=' + str(raw_energy).lower(),
'--frame-length=' + str(frame_length),
'--frame-shift=' + str(frame_shift),
'--num-ceps=' + str(num_ceps),
'--num-mel-bins=' + str(num_mel_bins),
'--cepstral-lifter=' + str(cepstral_lifter),
'--dither=' + str(dither),
'--snip-edges=' + str(snip_edges).lower(),
'ark:-',
'ark:-',
]
cmd4 += [
'--delta-order=' + str(delta_order),
'ark:-',
'ark:-',
]
# import ipdb; ipdb.set_trace()
if normalization:
data /= np.max(np.abs(data), axis=0) # normalize to [-1,1]
# Compute static features
with open(os.devnull, "w") as fnull:
pipe1 = Popen(cmd1, stdin=PIPE, stdout=PIPE, stderr=fnull)
# write wav file name (as if it were a Kaldi ark file)
pipe1.stdin.write(b'abc ')
# write WAV file in 16-bit format
io.write_wav(pipe1.stdin, data, rate)
pipe1.stdin.close()
feats = [mat for name, mat in io.read_mat_ark(pipe1.stdout)][0]
assert len(feats)
# Compute and apply CMVN with deltas
with tempfile.NamedTemporaryFile(suffix='.cmvn') as cmvnfile,\
open(os.devnull, "w") as fnull:
cmd2 += [
'ark:-',
'arkcmvnfile.name',
]
pipe2 = Popen(cmd2, stdin=PIPE, stdout=PIPE, stderr=fnull)
io.write_mat(pipe2.stdin, feats, key=b'abc')
# pipe2.stdin.close()
pipe2.communicate()
cmd3 += [
'arkcmvnfile.name',
'ark:-',
'ark:-',
]
pipe3 = Popen(cmd3, stdin=PIPE, stdout=PIPE, stderr=fnull)
pipe4 = Popen(cmd4, stdin=pipe3.stdout, stdout=PIPE, stderr=fnull)
io.write_mat(pipe3.stdin, feats, key=b'abc')
pipe3.stdin.close()
ret = [mat for name, mat in io.read_mat_ark(pipe4.stdout)][0]
return ret
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Milos Cernak <milos.cernak@idiap.ch>
# August 31, 2017
import os
import numpy as np
from . import io
from subprocess import PIPE, Popen
from os.path import isfile
import tempfile
# import shutil
import logging
import pkg_resources
import bob.kaldi
logger = logging.getLogger(__name__)
def nnet_forward(feats, nnet, feats_transform='', apply_log=False,
no_softmax=False, prior_floor=1e-10, prior_scale=1,
use_gpu=False):
"""Computes the forward pass for given features.
Parameters
----------
feats: numpy.ndarray
The input cepstral features (2D array of 32-bit floats).
nnet: str
The neural network
feats_transform : :obj:`str`, optional
The input feature transform for ``feats``.
apply_log : :obj:`bool`, optional
Transform NN output by log().
no_softmax : :obj:`bool`, optional
Removes the last component with Softmax.
prior_floor : :obj:`float`, optional
Flooring constant for prior probability.
prior_scale : :obj:`float`, optional
Scaling factor to be applied on pdf-log-priors.
use_gpu : :obj:`bool`, optional
Compute forward pass on GPU.
Returns
-------
numpy.ndarray
The posterior features.
"""
binary1 = 'nnet-forward'
cmd1 = [binary1]
cmd1 += [
'--apply-log=' + str(apply_log).lower(),
'--no-softmax=' + str(no_softmax).lower(),
'--prior-floor=' + str(prior_floor),
'--prior-scale=' + str(prior_scale),
'--use-gpu=' + str(use_gpu).lower(),
]
# save nnet model to a file
with tempfile.NamedTemporaryFile(
delete=False, suffix='.nnet') as dnn:
with open(dnn.name, 'wt') as fp:
fp.write(nnet)
if feats_transform != '':
# save nnet transform model to a file
with tempfile.NamedTemporaryFile(
delete=False, suffix='.nnet') as transf:
with open(transf.name, 'wt') as fp:
fp.write(feats_transform)
cmd1 += [
'--feature-transform=' + transf.name,
]
cmd1 += [
dnn.name,
'ark:-',
'ark:-',
]
with tempfile.NamedTemporaryFile(suffix='.log') as logfile:
pipe1 = Popen(cmd1, stdin=PIPE, stdout=PIPE, stderr=logfile)
io.write_mat(pipe1.stdin, feats, key=b'abc')
pipe1.stdin.close()
# pipe1.communicate()
posts = [mat for name, mat in io.read_mat_ark(pipe1.stdout)][0]
with open(logfile.name) as fp:
logtxt = fp.read()
logger.debug("%s", logtxt)
os.unlink(dnn.name)
if feats_transform != '':
os.unlink(transf.name)
return posts
def compute_dnn_vad(samples, rate, silence_threshold=0.9, posterior=0):
"""Performs Voice Activity Detection on a Kaldi feature matrix
Parameters
----------
feats : numpy.ndarray
A 2-D numpy array, with log-energy being in the first
component of each feature vector
rate : float
The sampling rate of the input signal in ``samples``.
silence_threshold: :obj:`float`, optional
Silence threshold to be used for silence posterior
evaluation.
posterior: :obj:`int`, optional
Index of posterior feature to be used for detection. Useful
ones are 0, 1 and 2, for silence, laughter and
noise,respectively.
Returns
-------
numpy.ndarray
The labels [1/0] of voiced features (1D array of floats).
"""
nnetfile = pkg_resources.resource_filename(__name__,
'test/dnn/ami.nnet.txt')
transfile = pkg_resources.resource_filename(__name__,
'test/dnn/ami.feature_transform.txt')
feats = bob.kaldi.cepstral(samples, 'mfcc', rate,
normalization=False)
with open(nnetfile) as nnetf, \
open(transfile) as trnf:
dnn = nnetf.read()
trn = trnf.read()
post = bob.kaldi.nnet_forward(feats, dnn, trn)
vad = []
for row in post:
if row[posterior] > silence_threshold:
vad.append(1.0)
else:
vad.append(0.0)
return vad
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
0
0
0
0
1
1
1
0
0
0
0
1
1
0
0
1
1
0
0
0
1
1
1
0
0
1
1
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
1
1
1
1
1
1
1
1
1
1
1
1
0
1
1
1
1
1
1
1
1
1
1
0
0
1
1
0
0
0
0
0
0
1
0
1
0
0
0
0
1
1
1
1
1
1
1
1
1
1
1
1
1
0
0
0
1
1
0
0
0
0
0
0
0
0
0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
1
1
1
0
0
0
0
1
1
1
0
0
0
0
0
0
0
1
1
1
0
1
1
1
1
1
0
0
0
0
0
0
0
1
1
1
1
0
1
1
0
1
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
1
1
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
This source diff could not be displayed because it is too large. You can view the blob instead.
<Nnet>
<Splice> 351 39
[ -4 -3 -2 -1 0 1 2 3 4 ]
<!EndOfComponent>
<AddShift> 351 351
<LearnRateCoef> 0 [ 0.07302087 0.0235805 -0.01384947 -0.01625082 0.002956068 0.008314143 0.004086975 0.01730765 0.1074382 0.052967 0.001973744 0.05226177 -0.01387728 -0.03906755 -0.035242 -0.001233529 -0.01802372 0.008243618 0.005789944 0.009321066 0.007390833 -0.004728262 -0.002123247 0.004100703 0.0005919941 0.00310682 -0.006999172 -0.005040503 0.0008500413 -0.002566969 0.003218116 0.002033148 0.001549504 0.002062428 0.0001206222 -1.035119e-05 -0.000122161 -0.0005453216 0.0009008246 0.05882392 0.01133008 -0.01059108 -0.02196749 0.003566717 0.008457286 0.006791435 0.01715907 0.1003676 0.04850281 0.002643854 0.05028832 -0.0162125 -0.0299234 -0.02726331 -0.0008800918 -0.01388723 0.005174594 0.002923323 0.006765986 0.004464419 -0.005038093 -0.002988146 0.002805253 -4.217767e-05 0.001462952 -0.003416326 -0.00217222 0.0007064922 -0.001068163 0.00185232 0.001018776 0.0007152287 0.0009800895 9.742384e-05 -0.0001969778 -0.0003832079 -0.0005638678 0.0003931217 0.04733803 0.001032056 -0.008381872 -0.02677491 0.002969651 0.006956617 0.008805118 0.01605053 0.09372687 0.04375516 0.002785146 0.04821709 -0.0188819 -0.0207152 -0.01910599 -0.0002937955 -0.009622013 0.002228954 0.0002955265 0.004238415 0.001587092 -0.005327369 -0.003876561 0.001415257 -0.0008379887 -0.0001819952 -9.32336e-05 0.0005023094 0.0006736687 0.000333837 0.000573704 0.0001699949 -8.26118e-05 3.039412e-06 2.354745e-05 -0.0003209513 -0.000586114 -0.0005851098 -9.654056e-05 0.03884477 -0.007264752 -0.007098787 -0.03070732 0.001070614 0.004289177 0.009813774 0.01362119 0.08757126 0.0386314 0.002385832 0.04614868 -0.02156824 -0.01203297 -0.01127821 0.0006619566 -0.005472397 -0.0004654869 -0.001862671 0.001915545 -0.001083196 -0.005648137 -0.004672528 7.495232e-05 -0.001745021 -0.001818788 0.002939682 0.00295497 0.0006877846 0.001613577 -0.0006188994 -0.0005921877 -0.0008205423 -0.0008770141 -0.000101541 -0.0004045696 -0.0007194701 -0.0005798393 -0.0005943631 0.03301426 -0.01298365 -0.006175284 -0.03347287 -0.001584254 0.0005502061 0.009796665 0.01072404 0.08192617 0.03366156 0.001739865 0.04400891 -0.02436568 -0.00419222 -0.004041019 0.001920158 -0.001592133 -0.002788188 -0.003575428 -0.0001686669 -0.003429411 -0.006103384 -0.005374189 -0.001105529 -0.002632777 -0.003387051 0.005819394 0.005235938 0.0006548844 0.002803291 -0.001797468 -0.001372183 -0.001513056 -0.001737782 -0.0002628193 -0.0004929031 -0.0008157415 -0.0005276073 -0.001086891 0.02718375 -0.01870256 -0.005251781 -0.03623843 -0.004239123 -0.003188765 0.009779557 0.007826886 0.07628108 0.02869171 0.001093897 0.04186914 -0.02716313 0.003648532 0.003196168 0.003178359 0.002288131 -0.005110888 -0.005288186 -0.002252879 -0.005775627 -0.006558631 -0.00607585 -0.002286009 -0.003520534 -0.004955315 0.008699105 0.007516906 0.0006219844 0.003993004 -0.002976038 -0.002152178 -0.00220557 -0.00259855 -0.0004240976 -0.0005812366 -0.0009120128 -0.0004753753 -0.001579419 0.03220744 -0.01345414 0.0002954118 -0.03197001 -0.008316422 -0.007181557 0.007978892 0.001800622 0.06885503 0.02185406 -0.002096529 0.03719297 -0.03301971 0.01608781 0.01386591 0.003588696 0.007754588 -0.00958896 -0.008736921 -0.005610386 -0.009542064 -0.006943207 -0.0068794 -0.003596836 -0.004025334 -0.007017772 0.01070777 0.008566376 -0.0003255951 0.004494406 -0.004089253 -0.003037308 -0.002669497 -0.003216664 -0.0002267958 -0.0004029166 -0.00063859 -2.077999e-05 -0.001675183 0.05316684 0.005661163 0.006663568 -0.02147653 -0.01943786 -0.01658334 0.0009761757 -0.009240531 0.06057328 0.01445504 -0.006462085 0.03198684 -0.0419453 0.03106612 0.02551109 0.00201493 0.01365144 -0.01557967 -0.01363296 -0.009558871 -0.01413547 -0.006802708 -0.00744251 -0.004488339 -0.003738501 -0.008965097 0.01066717 0.007614315 -0.001702024 0.004050317 -0.004350182 -0.003346164 -0.002492959 -0.003199676 0.000249924 -6.897183e-06 -8.859586e-05 0.0005787262 -0.001330054 0.08915102 0.03500578 0.008381923 -0.006164612 -0.03781408 -0.03196051 -0.009791993 -0.02487535 0.05307275 0.006827267 -0.01089181 0.02896047 -0.05180735 0.0447268 0.03463959 -0.001902163 0.01827253 -0.02154731 -0.01893537 -0.01303503 -0.01838589 -0.005662655 -0.007315661 -0.004494661 -0.002522763 -0.01005786 0.008555398 0.005140978 -0.002646815 0.00287385 -0.003519002 -0.002783834 -0.001748739 -0.00250811 0.0007176428 0.0004648716 0.0004929361 0.0009396413 -0.0008155004 ]
<!EndOfComponent>
<Rescale> 351 351
<LearnRateCoef> 0 [ 0.08078318 0.06096808 0.07933143 0.06777273 0.07074793 0.07174462 0.07259293 0.07203876 0.07743771 0.07611391 0.0827964 0.08748642 0.09801512 0.4398758 0.292906 0.3348854 0.3244864 0.3042457 0.2967722 0.3063778 0.2905585 0.3084474 0.3062362 0.3307252 0.3471294 0.3826901 1.126455 0.7725869 0.8177018 0.8113005 0.7432443 0.7152259 0.7318503 0.6896848 0.7279865 0.7254696 0.7754512 0.8158533 0.8940132 0.08084361 0.06096557 0.07931527 0.06775793 0.07072358 0.07171571 0.07256709 0.07202239 0.07740559 0.07608894 0.08277318 0.08746568 0.09799299 0.4403423 0.2931458 0.3349563 0.3246298 0.3042938 0.2967868 0.3064012 0.2905694 0.3084794 0.3062448 0.3307367 0.3471606 0.382733 1.129486 0.7735071 0.8176212 0.8114882 0.7430896 0.7150376 0.7315465 0.6893305 0.7275209 0.7250262 0.7749298 0.8153241 0.8933406 0.08089885 0.06096607 0.07930301 0.06774576 0.07069873 0.07168649 0.07254004 0.07200579 0.07737518 0.07606556 0.08275098 0.08744671 0.09797359 0.4407612 0.2933591 0.3350125 0.3247608 0.3043203 0.2967813 0.3064161 0.2905535 0.3084687 0.3062499 0.3307337 0.3471802 0.3827583 1.132753 0.7745767 0.8177076 0.8118495 0.7431031 0.7149858 0.7313443 0.6891336 0.7272172 0.7246923 0.7744984 0.8149316 0.8928113 0.0809462 0.06097085 0.07929435 0.06773707 0.0706773 0.07166119 0.07251572 0.07198936 0.07734919 0.07604401 0.08273098 0.08743161 0.09795509 0.4412819 0.2936083 0.3351088 0.3249089 0.3043527 0.2967887 0.3064248 0.290536 0.3084328 0.3062267 0.3307001 0.3471632 0.3827292 1.136265 0.775835 0.8180715 0.8125442 0.7434538 0.7152567 0.7315122 0.6892576 0.7272912 0.7247251 0.7744896 0.8149842 0.8928118 0.08098388 0.06097905 0.0792919 0.06773242 0.07065843 0.07163615 0.07249085 0.0719741 0.07732397 0.07602958 0.08271485 0.08742099 0.09793857 0.4420364 0.2939727 0.3353621 0.3251995 0.3045243 0.2969367 0.3065682 0.2906579 0.3085361 0.3063282 0.3308067 0.347298 0.3828599 1.139751 0.7770842 0.8185292 0.8133532 0.7439222 0.7156461 0.7318692 0.6895365 0.7275417 0.7249857 0.7747393 0.8153117 0.8931419 0.08102163 0.06098727 0.07928945 0.06772775 0.07063958 0.07161113 0.07246599 0.07195883 0.07729878 0.07601517 0.08269873 0.08741038 0.09792207 0.4428003 0.2943398 0.335616 0.3254915 0.3046964 0.2970849 0.3067119 0.2907801 0.3086395 0.3064298 0.3309133 0.347433 0.3829907 1.143283 0.7783418 0.8189875 0.8141653 0.744392 0.7160363 0.7322267 0.6898161 0.7277924 0.7252467 0.7749893 0.8156396 0.8934725 0.08101144 0.06099514 0.07931804 0.06773064 0.07063186 0.07159495 0.07244749 0.07194699 0.07728382 0.07601298 0.08268914 0.08741034 0.09791517 0.4449769 0.2952188 0.3361672 0.3262218 0.3051659 0.2975084 0.307135 0.2911405 0.3089855 0.3067611 0.3312765 0.3478601 0.3834351 1.146206 0.7792602 0.8197346 0.8148946 0.7450451 0.7166575 0.7327564 0.6902981 0.7282373 0.7257249 0.7754751 0.816219 0.8940891 0.08094215 0.06101002 0.0793747 0.06775392 0.07063782 0.07159191 0.07243584 0.07194046 0.07728469 0.07602531 0.08269496 0.08742859 0.09792184 0.4482685 0.2962975 0.3367847 0.3270929 0.3057499 0.2980366 0.3076147 0.2915687 0.3093514 0.3071257 0.3316468 0.3483257 0.3838967 1.148145 0.7803364 0.8212255 0.8159979 0.7460446 0.7176085 0.7336907 0.6911409 0.7291434 0.7266032 0.7764568 0.8173299 0.8952405 0.08084263 0.06103747 0.07943666 0.06779841 0.07065814 0.07160315 0.07243632 0.07194414 0.07730182 0.07605443 0.08272105 0.08746169 0.09795123 0.4513378 0.2971132 0.3374144 0.3277786 0.3063014 0.2985557 0.3080258 0.2919705 0.3096584 0.3074462 0.3319665 0.3487236 0.3842777 1.150439 0.7820504 0.8230242 0.8176472 0.7473024 0.7187643 0.734898 0.6921927 0.7303236 0.7277593 0.7776926 0.8187327 0.89671 ]
<!EndOfComponent>
</Nnet>
This diff is collapsed.
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Milos Cernak <milos.cernak@idiap.ch>
# September 1, 2017
'''Tests for Kaldi bindings'''
import pkg_resources
import numpy as np
import bob.io.audio
import bob.kaldi
def test_forward_pass():
sample = pkg_resources.resource_filename(__name__, 'data/sample16k.wav')
nnetfile = pkg_resources.resource_filename(__name__, 'dnn/ami.nnet.txt')
transfile = pkg_resources.resource_filename(__name__,
'dnn/ami.feature_transform.txt')
reference = pkg_resources.resource_filename(
__name__, 'data/sample16k-posteriors.txt')
data = bob.io.audio.reader(sample)
feats = bob.kaldi.cepstral(data.load()[0], 'mfcc', data.rate,
normalization=False)
with open(nnetfile) as nnetf, \
open(transfile) as trnf:
dnn = nnetf.read()
trn = trnf.read()
ours = bob.kaldi.nnet_forward(feats, dnn, trn)
theirs = np.loadtxt(reference)
assert ours.shape == theirs.shape
assert np.allclose(ours, theirs, 1e-03, 1e-05)
def test_compute_dnn_vad():
sample = pkg_resources.resource_filename(__name__, 'data/sample16k.wav')
reference = pkg_resources.resource_filename(
__name__, 'data/sample16k-dnn-vad.txt')
data = bob.io.audio.reader(sample)
ours = bob.kaldi.compute_dnn_vad(data.load()[0], data.rate)
theirs = np.loadtxt(reference)
assert np.allclose(ours, theirs)
......@@ -54,3 +54,38 @@ def test_compute_vad():
theirs = np.loadtxt(reference)
assert np.allclose(ours, theirs)
def test_cepstral_mfcc():
sample = pkg_resources.resource_filename(__name__, 'data/sample16k.wav')
reference = pkg_resources.resource_filename(
__name__, 'data/sample16k-cepstral-mfcc.txt')
data = bob.io.audio.reader(sample)
ours = bob.kaldi.cepstral(data.load()[0], 'mfcc', data.rate,
normalization=False)
theirs = np.loadtxt(reference)
assert ours.shape == theirs.shape