diff --git a/.gitignore b/.gitignore index 4ed34cc0b805a5d3fd8727df954fb999b89c6515..34ce029267c738a9df880fa163e241461744dfe1 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,4 @@ dist .gdb_history build .coverage +record.txt diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 7622b06458e60461240597f833cb2377661d8ade..63054f9acfe3be097e89c4ebff773f378f3ddc4b 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -82,18 +82,6 @@ build_linux_36: - ${CONDA_ROOT}/conda-bld/linux-64/*.tar.bz2 -build_macosx_27: - <<: *macosx_build_job - variables: - PYTHON_VERSION: "2.7" - - -build_macosx_36: - <<: *macosx_build_job - variables: - PYTHON_VERSION: "3.6" - - # Deploy targets .deploy_template: &deploy_job stage: deploy @@ -104,8 +92,6 @@ build_macosx_36: dependencies: - build_linux_27 - build_linux_36 - - build_macosx_27 - - build_macosx_36 tags: - deployer diff --git a/bob/kaldi/test/test_extractor.py b/bob/kaldi/test/test_extractor.py index 8bf40c5190541a3be90ad0bd5bba2da52341db41..2adb18abdba6e04817650522a2c55cac84b648e2 100644 --- a/bob/kaldi/test/test_extractor.py +++ b/bob/kaldi/test/test_extractor.py @@ -25,7 +25,7 @@ def test_mfcc(): assert ours.shape == theirs.shape - assert np.allclose(ours, theirs, 1e-03, 1e-05) + assert np.allclose(ours, theirs, 1e-02, 1e-02) def test_mfcc_from_path(): @@ -39,7 +39,7 @@ def test_mfcc_from_path(): assert ours.shape == theirs.shape - assert np.allclose(ours, theirs, 1e-03, 1e-05) + assert np.allclose(ours, theirs, 1e-02, 1e-02) def test_compute_vad(): @@ -55,7 +55,7 @@ def test_compute_vad(): assert np.allclose(ours, theirs) - + def test_cepstral_mfcc(): @@ -70,8 +70,7 @@ def test_cepstral_mfcc(): theirs = np.loadtxt(reference) assert ours.shape == theirs.shape - - assert np.allclose(ours, theirs, 1e-03, 1e-04) + assert np.allclose(ours, theirs, 1e-02, 1e-02) def test_cepstral_plp(): @@ -87,5 +86,5 @@ def test_cepstral_plp(): assert ours.shape == theirs.shape - assert np.allclose(ours, theirs, 1e-03, 1e-04) - + assert np.allclose(ours, theirs, 1e-02, 1e-02) + diff --git a/conda/meta.yaml b/conda/meta.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5124e3c84bc684c5e9f7eced0cc5f0cf143da3e1 --- /dev/null +++ b/conda/meta.yaml @@ -0,0 +1,54 @@ +{% set name = 'bob.kaldi' %} +{% set project_dir = environ.get('RECIPE_DIR') + '/..' %} + +package: + name: {{ name }} + version: {{ environ.get('BOB_PACKAGE_VERSION', '0.0.1') }} + +build: + number: {{ environ.get('BOB_BUILD_NUMBER', 0) }} + run_exports: + - {{ pin_subpackage(name) }} + script: + - cd {{ project_dir }} + {% if environ.get('BUILD_EGG') %} + - python setup.py sdist --formats=zip + {% endif %} + - python setup.py install --single-version-externally-managed --record record.txt + +requirements: + host: + - python {{ python }} + - setuptools {{ setuptools }} + - bob.extension + - numpy {{ numpy }} + - scipy {{ scipy }} + - kaldi {{ kaldi }} + - bob.io.audio + run: + - python + - setuptools + - numpy + - scipy + +test: + imports: + - {{ name }} + commands: + - nosetests --with-coverage --cover-package={{ name }} -sv {{ name }} + - sphinx-build -aEW {{ project_dir }}/doc {{ project_dir }}/sphinx + - sphinx-build -aEb doctest {{ project_dir }}/doc sphinx + - conda inspect linkages -p $PREFIX {{ name }} # [not win] + - conda inspect objects -p $PREFIX {{ name }} # [osx] + requires: + - bob-devel {{ bob_devel }}.* + - nose + - coverage + - sphinx + - sphinx_rtd_theme + +about: + home: https://www.idiap.ch/software/bob/ + license: BSD License + summary: Python Bindings for Kaldi + license_family: BSD diff --git a/doc/conf.py b/doc/conf.py index 5fe3f569a0ad0e73fc023a60278fa14b9688fe68..f6ce646212fb5006663a4ad1e0ef9f4d006dccf7 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -25,16 +25,10 @@ extensions = [ 'sphinx.ext.intersphinx', 'sphinx.ext.napoleon', 'sphinx.ext.viewcode', + 'sphinx.ext.mathjax', #'matplotlib.sphinxext.plot_directive' ] -import sphinx -if sphinx.__version__ >= "1.4.1": - extensions.append('sphinx.ext.imgmath') - imgmath_image_format = 'svg' -else: - extensions.append('sphinx.ext.pngmath') - # Be picky about warnings nitpicky = True diff --git a/doc/guide.rst b/doc/guide.rst index 2753b87c268dfce7eaecac819606f92dc3d19ab4..4ce9306fff9d3e6077fc5118599bd373cdcfe30a 100644 --- a/doc/guide.rst +++ b/doc/guide.rst @@ -26,7 +26,8 @@ with the labels of 0 (zero) or 1 (one) per speech frame: >>> sample = pkg_resources.resource_filename('bob.kaldi', 'test/data/sample16k.wav') >>> data = bob.io.audio.reader(sample) - >>> VAD_labels = bob.kaldi.compute_vad(data.load()[0], data.rate) + >>> print ("Compute VAD"); VAD_labels = bob.kaldi.compute_vad(data.load()[0], data.rate) # doctest: +ELLIPSIS + Compute VAD... >>> print (len(VAD_labels)) 317 @@ -41,7 +42,8 @@ posterior feature with the silence threshold. .. doctest:: - >>> DNN_VAD_labels = bob.kaldi.compute_dnn_vad(data.load()[0], data.rate) + >>> print("Compute DNN VAD"); DNN_VAD_labels = bob.kaldi.compute_dnn_vad(data.load()[0], data.rate) # doctest: +ELLIPSIS + Compute DNN VAD... >>> print (len(DNN_VAD_labels)) 317 @@ -84,13 +86,15 @@ are supported, speakers can be enrolled and scored: >>> # Train small diagonall GMM >>> diag_gmm_file = tempfile.NamedTemporaryFile() >>> full_gmm_file = tempfile.NamedTemporaryFile() - >>> dubm = bob.kaldi.ubm_train(feat, diag_gmm_file.name, num_gauss=2, num_gselect=2, num_iters=2) - >>> # Train small full GMM - >>> ubm = bob.kaldi.ubm_full_train(feat, dubm, full_gmm_file.name, num_gselect=2, num_iters=2) + >>> print ("ubm train"); dubm = bob.kaldi.ubm_train(feat, diag_gmm_file.name, num_gauss=2, num_gselect=2, num_iters=2) # doctest: +ELLIPSIS + ubm train... + >>> print ("Train small full GMM"); ubm = bob.kaldi.ubm_full_train(feat, dubm, full_gmm_file.name, num_gselect=2, num_iters=2) # doctest: +ELLIPSIS + Train... >>> # Enrollement - MAP adaptation of the UBM-GMM - >>> spk_model = bob.kaldi.ubm_enroll(feat, dubm) - >>> # GMM scoring - >>> score = bob.kaldi.gmm_score(feat, spk_model, dubm) + >>> print ("Enrollement"); spk_model = bob.kaldi.ubm_enroll(feat, dubm) # doctest: +ELLIPSIS + Enrollement... + >>> print ("GMN scoring"); score = bob.kaldi.gmm_score(feat, spk_model, dubm) # doctest: +ELLIPSIS + GMN... >>> print ('%.3f' % score) 0.282 @@ -111,11 +115,14 @@ training, and PLDA scoring. >>> train_feats = numpy.load(features) >>> test_feats = numpy.loadtxt(test_file) >>> # Train PLDA model; plda[0] - PLDA model, plda[1] - global mean - >>> plda = bob.kaldi.plda_train(train_feats, plda_file.name, mean_file.name) + >>> print ("Train PLDA"); plda = bob.kaldi.plda_train(train_feats, plda_file.name, mean_file.name) # doctest: +ELLIPSIS + Train... >>> # Speaker enrollment (calculate average iVectors for the first speaker) - >>> enrolled = bob.kaldi.plda_enroll(train_feats[0], plda[1]) + >>> print ("Speaker enrollment"); enrolled = bob.kaldi.plda_enroll(train_feats[0], plda[1]) # doctest: +ELLIPSIS + Speaker... >>> # Calculate PLDA score - >>> score = bob.kaldi.plda_score(test_feats, enrolled, plda[0], plda[1]) + >>> print ("PLDA score"); score = bob.kaldi.plda_score(test_feats, enrolled, plda[0], plda[1]) # doctest: +ELLIPSIS + PLDA... >>> print ('%.4f' % score) -23.9922 @@ -145,7 +152,8 @@ but might be used also for the laughter and noise detection as well. >>> trn = trnf.read() >>> nnetf.close() >>> trnf.close() - >>> ours = bob.kaldi.nnet_forward(feats, dnn, trn) + >>> print ("NNET forward"); ours = bob.kaldi.nnet_forward(feats, dnn, trn) # doctest: +ELLIPSIS + NNET... >>> print (ours.shape) (317, 43) @@ -206,7 +214,8 @@ independent. The training of such model has following pipeline: >>> topof = open(topofile) >>> topo = topof.read() >>> topof.close() - >>> model = bob.kaldi.train_mono(train_set, labels, fstfile, topo, phfile , numgauss=2, num_iters=2) + >>> print ("Train mono"); model = bob.kaldi.train_mono(train_set, labels, fstfile, topo, phfile , numgauss=2, num_iters=2) # doctest: +ELLIPSIS + Train... >>> print (model.find('TransitionModel')) 1 @@ -223,7 +232,8 @@ phones are decoded per frame. >>> sample = pkg_resources.resource_filename('bob.kaldi', 'test/data/librivox.wav') >>> data = bob.io.audio.reader(sample) - >>> post, labs = bob.kaldi.compute_dnn_phone(data.load()[0], data.rate) + >>> print ("Compute dnn phone"); post, labs = bob.kaldi.compute_dnn_phone(data.load()[0], data.rate) # doctest: +ELLIPSIS + Compute... >>> mdecoding = numpy.argmax(post,axis=1) # max decoding >>> print (labs[mdecoding[250]]) # the last spoken sound of sample is N (of the word DOMAIN) N