Commit 04a30197 authored by Sushil Bhattacharjee's avatar Sushil Bhattacharjee

updated functions; added epc plot

parent a6280e55
Pipeline #24293 failed with stage
in 2 seconds
......@@ -14,38 +14,17 @@
"### With VoxForge Dataset: \n",
" - free, public dataset\n",
" - contains audio-recordings of 30 speakers (multiple recordings per speaker)\n",
" - short audio-recordings: ~3sec. long."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#To download data from VoxForge website uncomment and execute the following commands:\n",
"\n",
"#import webbrowser\n",
"#url = 'http://'+ 'www.voxforge.org/downloads'\n",
"#webbrowser.open(url)\n"
" - short audio-recordings: ~3sec. long.\n",
" - www.voxforge.org/downloads"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## We will use the SPEAR toolkit from Bob -- bob.bio.spear"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#bob.bio.spear documentation\n",
"#url = 'https://' + 'www.idiap.ch/software/bob/docs/bob/bob.bio.spear/stable/index.html'\n",
"#webbrowser.open(url)"
"## We will use the SPEAR toolkit from Bob -- bob.bio.spear\n",
"\n",
"- www.idiap.ch/software/bob/docs/bob/bob.bio.spear/stable/index.html"
]
},
{
......@@ -81,7 +60,9 @@
" = \\frac{P(speech~|~ speaker\\_model)}{P(speech~|~UBM)}~\\gt~c_1 \\\\ \\\\\n",
" = ln(P(speech~|~ speaker\\_model))~-~ln(P(speech~|~UBM))~\\gt~c_2\n",
"\\end{split}\n",
"\\end{equation}"
"\\end{equation}\n",
"\n",
"### Here we use a GMM to represent the UBM, and separate GMMs for every enrolled client (speaker_model)."
]
},
{
......@@ -90,7 +71,10 @@
"source": [
"## Training UBM for VoxForge dataset\n",
"\n",
"![](figures/asv_training.png)"
"![](figures/asv_training.png)\n",
"\n",
"### Feature Extraction: 20 MFCCs per 20ms frame\n",
"\n"
]
},
{
......@@ -99,11 +83,11 @@
"source": [
"## Enrollment Procedure\n",
"\n",
"Input: x: speech_sample, u: UBM, i: identity\n",
"Input: _x_: speech_sample, _u_: UBM, _i_: identity\n",
"\n",
"1. Compute F: array of MFCC features from x\n",
"1. Compute F: array of MFCC features from _x_\n",
"2. Apply MAP adaptation on UBM_GMM to generate Speaker_GMM\n",
"3. Store Speaker_GMM model for Speaker-i.\n",
"3. Store Speaker_GMM model for Speaker _i_.\n",
"\n",
"\n",
"![](figures/asv_enrollment.png)"
......@@ -115,13 +99,13 @@
"source": [
"## Probe Algorithm:\n",
"\n",
"Input: x: speech_sample, u: UBM, s: Speaker_model of 'claimed identity'\n",
"Input: _x_: probe speech sample, _u_: UBM, _s_: Speaker-model of 'claimed identity'\n",
"\n",
"1. Define a threshold c2.\n",
"2. Compute F: array of MFCC features from x\n",
"1. Compute F: array of MFCC features from x\n",
"2. Compute A = ln(p(F|speaker_model)): log(probability that speech-sample was produced by speaker-model of claimed-identity)\n",
"3. Compute B = ln(p(F|UBM)): log(probability that speech was produced by some other person in the world)\n",
"4. Return score = (A - B)\n",
"5. If score > $c_2$, accept that _x_ comes from speaker _s_\n",
"\n",
"![](figures/asv_probing.png)\n"
]
......@@ -161,9 +145,10 @@
"import re, sys\n",
"import numpy\n",
"from pathlib import Path\n",
"from speaker_lib import load_scores\n",
"import speaker_lib as sl\n",
"#from speaker_lib import load_scores\n",
"\n",
"my_file_name = \"data/voxforge_denoised_16K_scores_dev.txt\"\n",
"dev_score_file = \"data/voxforge_denoised_16K_scores_dev.txt\"\n",
"#print(\"My file is %s\" %my_file_name)\n",
"#my_file = Path(my_file_name)\n",
"#assert my_file.is_file(), \"File %s does not exist. Quitting.\" %my_file_name\n",
......@@ -172,13 +157,13 @@
"#x = dsf.readlines()\n",
"#dsf.close()\n",
"\n",
"dev_zei_scores, dev_gen_scores = load_scores(my_file_name)\n",
"dev_zero_effort_impostor_scores, dev_genuine_scores = sl.load_scores(dev_score_file)\n",
"\n",
"dev_gen_scores = [float(line.split()[3]) for line in x if line.split()[0] == line.split()[1]]\n",
"dev_zei_scores = [float(line.split()[3]) for line in x if line.split()[0] != line.split()[1]]\n",
"#dev_gen_scores = [float(line.split()[3]) for line in x if line.split()[0] == line.split()[1]]\n",
"#dev_zei_scores = [float(line.split()[3]) for line in x if line.split()[0] != line.split()[1]]\n",
"\n",
"dev_genuine_scores = numpy.array(dev_gen_scores)\n",
"dev_zero_effort_impostor_scores = numpy.array(dev_zei_scores)\n"
"#dev_genuine_scores = numpy.array(dev_gen_scores)\n",
"#dev_zero_effort_impostor_scores = numpy.array(dev_zei_scores)\n"
]
},
{
......@@ -214,7 +199,7 @@
"source": [
"## Plot the Detection Error Tradeoff (DET) Curve\n",
"\n",
"There are two kinds of detection error:\n",
"### There are two kinds of detection error:\n",
" - Genuine presentation classified as Impostor: False non-match rate (FNMR)\n",
" - Impostor presentation classified as Genuine: False Match Rate (FMR)\n",
"\n",
......@@ -234,17 +219,24 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## Given:\n",
"## Next exercise:\n",
"\n",
"### Given:\n",
"\n",
"1. Pre-computed features: one feature-set per presentation audio.\n",
"2. Pre-computed UBM: GMM with 64 components\n",
"3. Pre-trained threshold, determined using 'dev' set.\n",
"\n",
"1. Pre-computed UBM: GMM with 64 components\n",
"2. Pre-trained threshold, determined using 'dev' set.\n",
"3. Pre-computed features: one feature-set per presentation audio-sample.\n",
"4. Pre-enrolled speakers in 'eval' set.\n",
"\n",
"## To do:\n",
"1. For each enrolled speaker:\n",
" 2. For feature-set of every probe-sample in 'eval' set:\n",
" 3. Determine if sample came from enrolled speaker"
"### To do:\n",
"\n",
"\n",
"```\n",
"For each enrolled speaker:\n",
" For feature-set of every probe-sample in 'eval' set:\n",
" Determine if sample came from enrolled speaker\n",
"```"
]
},
{
......
......@@ -6,8 +6,9 @@
"metadata": {},
"outputs": [],
"source": [
"#HOUSEKEEPING: load some libraries we will need, and initialize some file-names\n",
"%matplotlib inline\n",
"\n",
"from pathlib import Path\n",
"import re\n",
"import sys\n",
"import os.path\n",
......@@ -17,7 +18,10 @@
"import plots\n",
"\n",
"data_root = \"data/voxforge_eval/\"\n",
"UBM_GMM_FILE = os.path.join(data_root, \"Projector.hdf5\")\n",
"\n",
"UBM_GMM_FILE = os.path.join(data_root, \"denoised_16K_Projector.hdf5\")\n",
"assert Path(UBM_GMM_FILE).is_file(), \"File %s does not exist. Quitting.\" %UBM_GMM_FILE\n",
"\n",
"feature_root = os.path.join(data_root, \"extracted\")\n",
"model_root = os.path.join(data_root, \"models\")\n",
"f_extn = '.hdf5'\n",
......@@ -48,6 +52,10 @@
"metadata": {},
"outputs": [],
"source": [
"# HOUSEKEEPING: Initialize a GMM variable.\n",
"# This will hold the UBM-GMM, and given a speaker-identity and a feature-set,\n",
"# it will compute the log-likelihood ratio (LLR) score for the feature-set.\n",
"#\n",
"my_gmm = bob.bio.gmm.algorithm.GMM(\n",
" number_of_gaussians = 64,\n",
" kmeans_training_iterations = 5,\n",
......@@ -68,7 +76,16 @@
"# determine list of probe-subject names\n",
"names = [x.make_path().split('-')[0] for x in probe_files]\n",
"client_list = sorted(list(set(names)))\n",
"print(client_list)"
"print(\"Enrolled Clients: %d\\n\"%len(client_list), client_list)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Compute Score-Thresholds for FMR=0.1%, FMR=0.01% From scores of 'Dev' set\n",
"\n",
"- repeated from previous notebook."
]
},
{
......@@ -77,25 +94,13 @@
"metadata": {},
"outputs": [],
"source": [
"from pathlib import Path\n",
"import numpy\n",
"# determine score-thresholds from 'dev' set\n",
"# repeated from previous exercise\n",
"\n",
"from speaker_lib import load_scores\n",
"\n",
"my_file_name = \"data/voxforge_denoised_16K_scores_dev.txt\"\n",
"# we assume that the file is in the 4-column format devised for bob-score-files.\n",
"\n",
"#def load_scores(score_filename):\n",
"# my_file = Path(score_filename)\n",
"# assert my_file.is_file(), \"File %s does not exist. Quitting.\" %score_filename\n",
"# \n",
"# dsf = open(score_filename, \"r\")\n",
"# x = dsf.readlines()\n",
"# dsf.close()\n",
"# \n",
"# gen_scores = [float(line.split()[3]) for line in x if line.split()[0] == line.split()[1]]\n",
"# zei_scores = [float(line.split()[3]) for line in x if line.split()[0] != line.split()[1]]\n",
"# \n",
"# return numpy.array(zei_scores), numpy.array(gen_scores)\n",
"# The file is expected to be in the 4-column format devised for bob-score-files.\n",
" \n",
"dev16K_zei_scores, dev16K_gen_scores = load_scores(my_file_name)\n",
"#genuine_scores = numpy.array(dev16K_gen_scores)\n",
......@@ -108,13 +113,26 @@
"print(\"FMR @ 0.01%:\" '{:6.2f}'.format(dev_fmr_001))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load Probe Feature-Files\n",
"\n",
"### We have 10 enrolled clients\n",
" - For every client we have 30 'genuine' presentations, that is, 30 feature-files per client\n",
" - For any one client, we can use the presentations of the other clients as 'zero-effort-impostor' (ZEI) presentations\n",
" - So, for every client we have 270 (9 $\\times$ 30) ZEI presentations\n",
" - In total, we will have 300 genuine scores and 2,700 ZEI scores"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# load probe-feature_files and organize them in a dictionary, grouped by client_name\n",
"#HOUSEKEEPING: load probe-feature_files and organize them in a dictionary, grouped by client_name\n",
"genuine_probes = {}\n",
"for n in client_list:\n",
" flist = [bob.io.base.load(x.make_path(feature_root, f_extn)) for x in probe_files if n in x.make_path()]\n",
......@@ -128,6 +146,15 @@
" enrolled_models[n] = my_gmm.read_model(model_file) #construct dictionary by loading models from corresponding files."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"## Compute scores for genuine presentations"
]
},
{
"cell_type": "code",
"execution_count": null,
......@@ -139,18 +166,25 @@
"print('Computing scores for genuine presentations:')\n",
"genuine_score_dict = {}\n",
"# generate dictionary of genuine-presentation scores for each subject\n",
"for n in client_list:\n",
" print('Claimed id:', n)\n",
"for i, n in enumerate(client_list):\n",
" print('Claimed id %d:'%(i+1), n)\n",
" flist = genuine_probes[n]\n",
" score_list = [my_gmm.score(enrolled_models[n], my_gmm.project(k)) for k in flist]\n",
" genuine_score_dict[n] = score_list\n",
"\n",
" genuine_score_dict[n] = score_list\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print('\\nComputing scores for ZEI presentations:')\n",
"zei_score_dict = {}\n",
"# generate dictionary of zei-presentation scores for each subject\n",
"for n in client_list:\n",
"for i, n in enumerate(client_list):\n",
" claimed_model = enrolled_models[n]\n",
" print('Claimed id:', n)\n",
" print('Claimed id %d:'%(i+1), n)\n",
" claimed_zei_scores = None\n",
" zei_list = []\n",
" for z in client_list:\n",
......@@ -173,7 +207,7 @@
"metadata": {},
"outputs": [],
"source": [
"# group the score-distributions, for ease of manipulation...\n",
"# HOUSEKEEPING: group the score-distributions, for ease of manipulation...\n",
"zero_effort_scores = []\n",
"genuine_scores = []\n",
"for n in client_list:\n",
......@@ -200,8 +234,14 @@
"metadata": {},
"outputs": [],
"source": [
"\n",
"#plots.plot_hist(eval_zero_effort_scores, eval_genuine_scores, fmr_01=dev_fmr_01, fmr_001=dev_fmr_001)"
"plots.plot_hist(eval_zero_effort_scores, eval_genuine_scores, fmr_01=dev_fmr_01, fmr_001=dev_fmr_001)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Plot receiver operating characteristics (ROC) curve of recognition performance"
]
},
{
......@@ -210,11 +250,18 @@
"metadata": {},
"outputs": [],
"source": [
"print(len(eval_zero_effort_scores))\n",
"print(len(eval_genuine_scores))\n",
"#print(len(eval_zero_effort_scores))\n",
"#print(len(eval_genuine_scores))\n",
"plots.plot_roc(eval_zero_effort_scores, eval_genuine_scores)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Plot DET curve for this experiment."
]
},
{
"cell_type": "code",
"execution_count": null,
......@@ -224,13 +271,34 @@
"plots.plot_det(eval_zero_effort_scores, eval_genuine_scores)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## What happens if we record probe samples with 8K sampling rate (phone-quality) ?\n",
"\n",
"### Compare DET curves of recognition experiment with '16K' and '8K' data ...\n",
"\n",
"- pre-computed scores available for (same) speech-samples sampled at 8K samples per sec.\n",
"- training, and enrollment done with '16K' data."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"add epc"
"score_file_list = ['voxforge_denoised_16K_scores_dev.txt','scores_vf_denoised_8k_dev.txt']\n",
"labels = ['16K', '8K']\n",
"plots.plot_multidet(score_file_list, labels, base_path=\"./data\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### ... or the ROC curves of the two experiments"
]
},
{
......@@ -239,9 +307,6 @@
"metadata": {},
"outputs": [],
"source": [
"score_file_list = ['scores_vf_denoised_8k_dev.txt', 'voxforge_denoised_16K_scores_dev.txt']\n",
"labels = ['8K', '16K']\n",
"plots.plot_multidet(score_file_list, labels, base_path=\"./data\")\n",
"plots.plot_multiroc(score_file_list, labels, base_path=\"./data\")"
]
},
......@@ -252,6 +317,25 @@
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## add epc"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dev_file_list = ['voxforge_denoised_16K_scores_dev.txt','scores_vf_denoised_8k_dev.txt']\n",
"eval_file_list = ['voxforge_denoised_16K_scores_dev.txt','scores_vf_denoised_8k_eval.txt']\n",
"expt_labels = ['16K', '8K']\n",
"plots.plot_multiepc(dev_file_list, eval_file_list, expt_labels, base_path=\"./data\")"
]
},
{
"cell_type": "code",
"execution_count": null,
......
......@@ -136,7 +136,7 @@ def plot_roc(zero_effort_scores, genuine_scores):
pyplot.title("Receiver Operating Characteristic (ROC) curve")
pyplot.xlabel("FMR")
pyplot.ylabel("100 - FNMR")
#pyplot.ylim((0,1.))
pyplot.ylim((0,101))
pyplot.show()
......@@ -219,7 +219,7 @@ def plot_multiroc(file_names, labels, base_path="./scores"):
pyplot.show()
def plot_multidet(file_names, labels, base_path="./scores"):
def plot_multidet(file_names, labels, base_path="./data"):
"""
Plot DET curves from several systems
......@@ -249,6 +249,48 @@ def plot_multidet(file_names, labels, base_path="./scores"):
pyplot.show()
def plot_multiepc(dev_filenames, eval_filenames, labels, base_path="./data"):
"""
Plot DET curves from several systems
**Parameters**
dev_filenames: list of score-files of different algorithms on dev-set
eval_filenames: list of score-files of different algorithms on eval-set
labels = list of strings denoting the experiments being compared
Make sure that the order of the files in both lists corresponds to the list of expts.
"""
from speaker_lib import load_scores
# make sure 'dev_filenames' and 'eval_filenames' have same no. of items
assert (len(dev_filenames) == len(eval_filenames)), "File-name sets mismatch! Quitting."
assert (len(labels) == len(dev_filenames)), "Label-set does not match file-name set! Quitting."
score_dict={}
# EPC curve
pyplot.figure(figsize=(16,8))
# score-var names: 'd'for dev, 'e' for eval; 'z' for zei, 'g' for genuine.
for i, l in enumerate(labels):
d_z, d_g = load_scores(os.path.join(base_path, dev_filenames[i]))
e_z, e_g = load_scores(os.path.join(base_path, eval_filenames[i]))
#score_dict[label[i]] = [d_z, d_g, e_z, e_g]
bob.measure.plot.epc(d_z, d_g, e_z, e_g, npoints=100, linestyle='-', label=l)
#bob.measure.plot.det(zero_effort_scores, genuine_scores, npoints=100, label=l)
#bob.measure.plot.det_axis([0.01, 99, 0.01, 99])
pyplot.grid(True)
pyplot.title("(EPC)")
pyplot.xlabel("alfa")
pyplot.ylabel(" ")
pyplot.legend()
pyplot.show()
def plot_one_gaussian(dataset, model_mean=0.0, model_variance=1.0):
fig = plt.figure(figsize=(12, 6))
......
......@@ -12,7 +12,7 @@ from sklearn.mixture import GaussianMixture as GMM2
def load_scores(score_filename):
my_file = Path(score_filename)
assert my_file.is_file(), "File %s does not exist. Quitting." %score_filename
#print(my_file)
dsf = open(score_filename, "r")
x = dsf.readlines()
dsf.close()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment