Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
L
lab-speaker-recognition
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
biometric-resources
lab-speaker-recognition
Commits
04a30197
Commit
04a30197
authored
Oct 13, 2018
by
Sushil Bhattacharjee
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
updated functions; added epc plot
parent
a6280e55
Pipeline
#24293
failed with stage
in 2 seconds
Changes
4
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
206 additions
and
88 deletions
+206
-88
notebooks/model.07_asv_intro.ipynb
notebooks/model.07_asv_intro.ipynb
+42
-50
notebooks/model.08_asv_expts.ipynb
notebooks/model.08_asv_expts.ipynb
+119
-35
notebooks/plots.py
notebooks/plots.py
+44
-2
notebooks/speaker_lib.py
notebooks/speaker_lib.py
+1
-1
No files found.
notebooks/model.07_asv_intro.ipynb
View file @
04a30197
...
...
@@ -14,38 +14,17 @@
"### With VoxForge Dataset: \n",
" - free, public dataset\n",
" - contains audio-recordings of 30 speakers (multiple recordings per speaker)\n",
" - short audio-recordings: ~3sec. long."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#To download data from VoxForge website uncomment and execute the following commands:\n",
"\n",
"#import webbrowser\n",
"#url = 'http://'+ 'www.voxforge.org/downloads'\n",
"#webbrowser.open(url)\n"
" - short audio-recordings: ~3sec. long.\n",
" - www.voxforge.org/downloads"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## We will use the SPEAR toolkit from Bob -- bob.bio.spear"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#bob.bio.spear documentation\n",
"#url = 'https://' + 'www.idiap.ch/software/bob/docs/bob/bob.bio.spear/stable/index.html'\n",
"#webbrowser.open(url)"
"## We will use the SPEAR toolkit from Bob -- bob.bio.spear\n",
"\n",
"- www.idiap.ch/software/bob/docs/bob/bob.bio.spear/stable/index.html"
]
},
{
...
...
@@ -81,7 +60,9 @@
" = \\frac{P(speech~|~ speaker\\_model)}{P(speech~|~UBM)}~\\gt~c_1 \\\\ \\\\\n",
" = ln(P(speech~|~ speaker\\_model))~-~ln(P(speech~|~UBM))~\\gt~c_2\n",
"\\end{split}\n",
"\\end{equation}"
"\\end{equation}\n",
"\n",
"### Here we use a GMM to represent the UBM, and separate GMMs for every enrolled client (speaker_model)."
]
},
{
...
...
@@ -90,7 +71,10 @@
"source": [
"## Training UBM for VoxForge dataset\n",
"\n",
""
"\n",
"\n",
"### Feature Extraction: 20 MFCCs per 20ms frame\n",
"\n"
]
},
{
...
...
@@ -99,11 +83,11 @@
"source": [
"## Enrollment Procedure\n",
"\n",
"Input:
x: speech_sample, u: UBM, i
: identity\n",
"Input:
_x_: speech_sample, _u_: UBM, _i_
: identity\n",
"\n",
"1. Compute F: array of MFCC features from
x
\n",
"1. Compute F: array of MFCC features from
_x_
\n",
"2. Apply MAP adaptation on UBM_GMM to generate Speaker_GMM\n",
"3. Store Speaker_GMM model for Speaker
-i
.\n",
"3. Store Speaker_GMM model for Speaker
_i_
.\n",
"\n",
"\n",
""
...
...
@@ -115,13 +99,13 @@
"source": [
"## Probe Algorithm:\n",
"\n",
"Input:
x: speech_sample, u: UBM, s: Speaker_
model of 'claimed identity'\n",
"Input:
_x_: probe speech sample, _u_: UBM, _s_: Speaker-
model of 'claimed identity'\n",
"\n",
"1. Define a threshold c2.\n",
"2. Compute F: array of MFCC features from x\n",
"1. Compute F: array of MFCC features from x\n",
"2. Compute A = ln(p(F|speaker_model)): log(probability that speech-sample was produced by speaker-model of claimed-identity)\n",
"3. Compute B = ln(p(F|UBM)): log(probability that speech was produced by some other person in the world)\n",
"4. Return score = (A - B)\n",
"5. If score > $c_2$, accept that _x_ comes from speaker _s_\n",
"\n",
"\n"
]
...
...
@@ -161,9 +145,10 @@
"import re, sys\n",
"import numpy\n",
"from pathlib import Path\n",
"from speaker_lib import load_scores\n",
"import speaker_lib as sl\n",
"#from speaker_lib import load_scores\n",
"\n",
"
my_file_nam
e = \"data/voxforge_denoised_16K_scores_dev.txt\"\n",
"
dev_score_fil
e = \"data/voxforge_denoised_16K_scores_dev.txt\"\n",
"#print(\"My file is %s\" %my_file_name)\n",
"#my_file = Path(my_file_name)\n",
"#assert my_file.is_file(), \"File %s does not exist. Quitting.\" %my_file_name\n",
...
...
@@ -172,13 +157,13 @@
"#x = dsf.readlines()\n",
"#dsf.close()\n",
"\n",
"dev_ze
i_scores, dev_gen_scores = load_scores(my_file_nam
e)\n",
"dev_ze
ro_effort_impostor_scores, dev_genuine_scores = sl.load_scores(dev_score_fil
e)\n",
"\n",
"dev_gen_scores = [float(line.split()[3]) for line in x if line.split()[0] == line.split()[1]]\n",
"dev_zei_scores = [float(line.split()[3]) for line in x if line.split()[0] != line.split()[1]]\n",
"
#
dev_gen_scores = [float(line.split()[3]) for line in x if line.split()[0] == line.split()[1]]\n",
"
#
dev_zei_scores = [float(line.split()[3]) for line in x if line.split()[0] != line.split()[1]]\n",
"\n",
"dev_genuine_scores = numpy.array(dev_gen_scores)\n",
"dev_zero_effort_impostor_scores = numpy.array(dev_zei_scores)\n"
"
#
dev_genuine_scores = numpy.array(dev_gen_scores)\n",
"
#
dev_zero_effort_impostor_scores = numpy.array(dev_zei_scores)\n"
]
},
{
...
...
@@ -214,7 +199,7 @@
"source": [
"## Plot the Detection Error Tradeoff (DET) Curve\n",
"\n",
"There are two kinds of detection error:\n",
"
###
There are two kinds of detection error:\n",
" - Genuine presentation classified as Impostor: False non-match rate (FNMR)\n",
" - Impostor presentation classified as Genuine: False Match Rate (FMR)\n",
"\n",
...
...
@@ -234,17 +219,24 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## Given:\n",
"## Next exercise:\n",
"\n",
"### Given:\n",
"\n",
"1. Pre-computed features: one feature-set per presentation audio.\n",
"2. Pre-computed UBM: GMM with 64 components\n",
"3. Pre-trained threshold, determined using 'dev' set.\n",
"\n",
"1. Pre-computed UBM: GMM with 64 components\n",
"2. Pre-trained threshold, determined using 'dev' set.\n",
"3. Pre-computed features: one feature-set per presentation audio-sample.\n",
"4. Pre-enrolled speakers in 'eval' set.\n",
"\n",
"## To do:\n",
"1. For each enrolled speaker:\n",
" 2. For feature-set of every probe-sample in 'eval' set:\n",
" 3. Determine if sample came from enrolled speaker"
"### To do:\n",
"\n",
"\n",
"```\n",
"For each enrolled speaker:\n",
" For feature-set of every probe-sample in 'eval' set:\n",
" Determine if sample came from enrolled speaker\n",
"```"
]
},
{
...
...
notebooks/model.08_asv_expts.ipynb
View file @
04a30197
...
...
@@ -6,8 +6,9 @@
"metadata": {},
"outputs": [],
"source": [
"#HOUSEKEEPING: load some libraries we will need, and initialize some file-names\n",
"%matplotlib inline\n",
"\n",
"
from pathlib import Path
\n",
"import re\n",
"import sys\n",
"import os.path\n",
...
...
@@ -17,7 +18,10 @@
"import plots\n",
"\n",
"data_root = \"data/voxforge_eval/\"\n",
"UBM_GMM_FILE = os.path.join(data_root, \"Projector.hdf5\")\n",
"\n",
"UBM_GMM_FILE = os.path.join(data_root, \"denoised_16K_Projector.hdf5\")\n",
"assert Path(UBM_GMM_FILE).is_file(), \"File %s does not exist. Quitting.\" %UBM_GMM_FILE\n",
"\n",
"feature_root = os.path.join(data_root, \"extracted\")\n",
"model_root = os.path.join(data_root, \"models\")\n",
"f_extn = '.hdf5'\n",
...
...
@@ -48,6 +52,10 @@
"metadata": {},
"outputs": [],
"source": [
"# HOUSEKEEPING: Initialize a GMM variable.\n",
"# This will hold the UBM-GMM, and given a speaker-identity and a feature-set,\n",
"# it will compute the log-likelihood ratio (LLR) score for the feature-set.\n",
"#\n",
"my_gmm = bob.bio.gmm.algorithm.GMM(\n",
" number_of_gaussians = 64,\n",
" kmeans_training_iterations = 5,\n",
...
...
@@ -68,7 +76,16 @@
"# determine list of probe-subject names\n",
"names = [x.make_path().split('-')[0] for x in probe_files]\n",
"client_list = sorted(list(set(names)))\n",
"print(client_list)"
"print(\"Enrolled Clients: %d\\n\"%len(client_list), client_list)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Compute Score-Thresholds for FMR=0.1%, FMR=0.01% From scores of 'Dev' set\n",
"\n",
"- repeated from previous notebook."
]
},
{
...
...
@@ -77,25 +94,13 @@
"metadata": {},
"outputs": [],
"source": [
"from pathlib import Path\n",
"import numpy\n",
"# determine score-thresholds from 'dev' set\n",
"# repeated from previous exercise\n",
"\n",
"from speaker_lib import load_scores\n",
"\n",
"my_file_name = \"data/voxforge_denoised_16K_scores_dev.txt\"\n",
"# we assume that the file is in the 4-column format devised for bob-score-files.\n",
"\n",
"#def load_scores(score_filename):\n",
"# my_file = Path(score_filename)\n",
"# assert my_file.is_file(), \"File %s does not exist. Quitting.\" %score_filename\n",
"# \n",
"# dsf = open(score_filename, \"r\")\n",
"# x = dsf.readlines()\n",
"# dsf.close()\n",
"# \n",
"# gen_scores = [float(line.split()[3]) for line in x if line.split()[0] == line.split()[1]]\n",
"# zei_scores = [float(line.split()[3]) for line in x if line.split()[0] != line.split()[1]]\n",
"# \n",
"# return numpy.array(zei_scores), numpy.array(gen_scores)\n",
"# The file is expected to be in the 4-column format devised for bob-score-files.\n",
" \n",
"dev16K_zei_scores, dev16K_gen_scores = load_scores(my_file_name)\n",
"#genuine_scores = numpy.array(dev16K_gen_scores)\n",
...
...
@@ -108,13 +113,26 @@
"print(\"FMR @ 0.01%:\" '{:6.2f}'.format(dev_fmr_001))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load Probe Feature-Files\n",
"\n",
"### We have 10 enrolled clients\n",
" - For every client we have 30 'genuine' presentations, that is, 30 feature-files per client\n",
" - For any one client, we can use the presentations of the other clients as 'zero-effort-impostor' (ZEI) presentations\n",
" - So, for every client we have 270 (9 $\\times$ 30) ZEI presentations\n",
" - In total, we will have 300 genuine scores and 2,700 ZEI scores"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# load probe-feature_files and organize them in a dictionary, grouped by client_name\n",
"#
HOUSEKEEPING:
load probe-feature_files and organize them in a dictionary, grouped by client_name\n",
"genuine_probes = {}\n",
"for n in client_list:\n",
" flist = [bob.io.base.load(x.make_path(feature_root, f_extn)) for x in probe_files if n in x.make_path()]\n",
...
...
@@ -128,6 +146,15 @@
" enrolled_models[n] = my_gmm.read_model(model_file) #construct dictionary by loading models from corresponding files."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"## Compute scores for genuine presentations"
]
},
{
"cell_type": "code",
"execution_count": null,
...
...
@@ -139,18 +166,25 @@
"print('Computing scores for genuine presentations:')\n",
"genuine_score_dict = {}\n",
"# generate dictionary of genuine-presentation scores for each subject\n",
"for
n in client_list
:\n",
" print('Claimed id
:'
, n)\n",
"for
i, n in enumerate(client_list)
:\n",
" print('Claimed id
%d:'%(i+1)
, n)\n",
" flist = genuine_probes[n]\n",
" score_list = [my_gmm.score(enrolled_models[n], my_gmm.project(k)) for k in flist]\n",
" genuine_score_dict[n] = score_list\n",
"\n",
" genuine_score_dict[n] = score_list\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print('\\nComputing scores for ZEI presentations:')\n",
"zei_score_dict = {}\n",
"# generate dictionary of zei-presentation scores for each subject\n",
"for
n in client_list
:\n",
"for
i, n in enumerate(client_list)
:\n",
" claimed_model = enrolled_models[n]\n",
" print('Claimed id
:'
, n)\n",
" print('Claimed id
%d:'%(i+1)
, n)\n",
" claimed_zei_scores = None\n",
" zei_list = []\n",
" for z in client_list:\n",
...
...
@@ -173,7 +207,7 @@
"metadata": {},
"outputs": [],
"source": [
"# group the score-distributions, for ease of manipulation...\n",
"#
HOUSEKEEPING:
group the score-distributions, for ease of manipulation...\n",
"zero_effort_scores = []\n",
"genuine_scores = []\n",
"for n in client_list:\n",
...
...
@@ -200,8 +234,14 @@
"metadata": {},
"outputs": [],
"source": [
"\n",
"#plots.plot_hist(eval_zero_effort_scores, eval_genuine_scores, fmr_01=dev_fmr_01, fmr_001=dev_fmr_001)"
"plots.plot_hist(eval_zero_effort_scores, eval_genuine_scores, fmr_01=dev_fmr_01, fmr_001=dev_fmr_001)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Plot receiver operating characteristics (ROC) curve of recognition performance"
]
},
{
...
...
@@ -210,11 +250,18 @@
"metadata": {},
"outputs": [],
"source": [
"print(len(eval_zero_effort_scores))\n",
"print(len(eval_genuine_scores))\n",
"
#
print(len(eval_zero_effort_scores))\n",
"
#
print(len(eval_genuine_scores))\n",
"plots.plot_roc(eval_zero_effort_scores, eval_genuine_scores)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Plot DET curve for this experiment."
]
},
{
"cell_type": "code",
"execution_count": null,
...
...
@@ -224,13 +271,34 @@
"plots.plot_det(eval_zero_effort_scores, eval_genuine_scores)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## What happens if we record probe samples with 8K sampling rate (phone-quality) ?\n",
"\n",
"### Compare DET curves of recognition experiment with '16K' and '8K' data ...\n",
"\n",
"- pre-computed scores available for (same) speech-samples sampled at 8K samples per sec.\n",
"- training, and enrollment done with '16K' data."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"add epc"
"score_file_list = ['voxforge_denoised_16K_scores_dev.txt','scores_vf_denoised_8k_dev.txt']\n",
"labels = ['16K', '8K']\n",
"plots.plot_multidet(score_file_list, labels, base_path=\"./data\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### ... or the ROC curves of the two experiments"
]
},
{
...
...
@@ -239,9 +307,6 @@
"metadata": {},
"outputs": [],
"source": [
"score_file_list = ['scores_vf_denoised_8k_dev.txt', 'voxforge_denoised_16K_scores_dev.txt']\n",
"labels = ['8K', '16K']\n",
"plots.plot_multidet(score_file_list, labels, base_path=\"./data\")\n",
"plots.plot_multiroc(score_file_list, labels, base_path=\"./data\")"
]
},
...
...
@@ -252,6 +317,25 @@
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## add epc"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dev_file_list = ['voxforge_denoised_16K_scores_dev.txt','scores_vf_denoised_8k_dev.txt']\n",
"eval_file_list = ['voxforge_denoised_16K_scores_dev.txt','scores_vf_denoised_8k_eval.txt']\n",
"expt_labels = ['16K', '8K']\n",
"plots.plot_multiepc(dev_file_list, eval_file_list, expt_labels, base_path=\"./data\")"
]
},
{
"cell_type": "code",
"execution_count": null,
...
...
notebooks/plots.py
View file @
04a30197
...
...
@@ -136,7 +136,7 @@ def plot_roc(zero_effort_scores, genuine_scores):
pyplot
.
title
(
"Receiver Operating Characteristic (ROC) curve"
)
pyplot
.
xlabel
(
"FMR"
)
pyplot
.
ylabel
(
"100 - FNMR"
)
#pyplot.ylim((0,1.
))
pyplot
.
ylim
((
0
,
101
))
pyplot
.
show
()
...
...
@@ -219,7 +219,7 @@ def plot_multiroc(file_names, labels, base_path="./scores"):
pyplot
.
show
()
def
plot_multidet
(
file_names
,
labels
,
base_path
=
"./
scores
"
):
def
plot_multidet
(
file_names
,
labels
,
base_path
=
"./
data
"
):
"""
Plot DET curves from several systems
...
...
@@ -249,6 +249,48 @@ def plot_multidet(file_names, labels, base_path="./scores"):
pyplot
.
show
()
def
plot_multiepc
(
dev_filenames
,
eval_filenames
,
labels
,
base_path
=
"./data"
):
"""
Plot DET curves from several systems
**Parameters**
dev_filenames: list of score-files of different algorithms on dev-set
eval_filenames: list of score-files of different algorithms on eval-set
labels = list of strings denoting the experiments being compared
Make sure that the order of the files in both lists corresponds to the list of expts.
"""
from
speaker_lib
import
load_scores
# make sure 'dev_filenames' and 'eval_filenames' have same no. of items
assert
(
len
(
dev_filenames
)
==
len
(
eval_filenames
)),
"File-name sets mismatch! Quitting."
assert
(
len
(
labels
)
==
len
(
dev_filenames
)),
"Label-set does not match file-name set! Quitting."
score_dict
=
{}
# EPC curve
pyplot
.
figure
(
figsize
=
(
16
,
8
))
# score-var names: 'd'for dev, 'e' for eval; 'z' for zei, 'g' for genuine.
for
i
,
l
in
enumerate
(
labels
):
d_z
,
d_g
=
load_scores
(
os
.
path
.
join
(
base_path
,
dev_filenames
[
i
]))
e_z
,
e_g
=
load_scores
(
os
.
path
.
join
(
base_path
,
eval_filenames
[
i
]))
#score_dict[label[i]] = [d_z, d_g, e_z, e_g]
bob
.
measure
.
plot
.
epc
(
d_z
,
d_g
,
e_z
,
e_g
,
npoints
=
100
,
linestyle
=
'-'
,
label
=
l
)
#bob.measure.plot.det(zero_effort_scores, genuine_scores, npoints=100, label=l)
#bob.measure.plot.det_axis([0.01, 99, 0.01, 99])
pyplot
.
grid
(
True
)
pyplot
.
title
(
"(EPC)"
)
pyplot
.
xlabel
(
"alfa"
)
pyplot
.
ylabel
(
" "
)
pyplot
.
legend
()
pyplot
.
show
()
def
plot_one_gaussian
(
dataset
,
model_mean
=
0.0
,
model_variance
=
1.0
):
fig
=
plt
.
figure
(
figsize
=
(
12
,
6
))
...
...
notebooks/speaker_lib.py
View file @
04a30197
...
...
@@ -12,7 +12,7 @@ from sklearn.mixture import GaussianMixture as GMM2
def
load_scores
(
score_filename
):
my_file
=
Path
(
score_filename
)
assert
my_file
.
is_file
(),
"File %s does not exist. Quitting."
%
score_filename
#print(my_file)
dsf
=
open
(
score_filename
,
"r"
)
x
=
dsf
.
readlines
()
dsf
.
close
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment