Commit 79a9141d authored by Sushil Bhattacharjee's avatar Sushil Bhattacharjee

gmm exercises

parent 12b4a7e5
Pipeline #24247 failed with stage
in 1 minute and 8 seconds
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# start with k-means, and explain the difference between k-means and GMM\n",
"import numpy as np\n",
"#from sklearn.mixture import GMM\n",
"from sklearn.mixture import GaussianMixture as GMM2\n",
"from matplotlib import pyplot as plt\n",
"import warnings\n",
"import time\n",
"\n",
"warnings.simplefilter(\"ignore\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#define function to plot k-means results\n",
"from sklearn.cluster import KMeans\n",
"from scipy.spatial.distance import cdist\n",
"\n",
"def plot_kmeans(kmeans, X, n_clusters=4, rseed=0, axs=None):\n",
" labels = kmeans.fit_predict(X)\n",
"\n",
" # plot the input data\n",
" ax = axs or plt.gca()\n",
" ax.axis('equal')\n",
" ax.scatter(X[:, 0], X[:, 1], c=labels, s=40, cmap='viridis', zorder=2)\n",
"\n",
" # plot the representation of the KMeans model\n",
" centers = kmeans.cluster_centers_\n",
" radii = [cdist(X[labels == i], [center]).max() for i, center in enumerate(centers)]\n",
" for c, r in zip(centers, radii):\n",
" ax.add_patch(plt.Circle(c, r, fc='#CCCCCC', lw=3, alpha=0.5, zorder=1))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Generate some data\n",
"from sklearn.datasets.samples_generator import make_blobs\n",
"X, y_true = make_blobs(n_samples=500, centers=4, cluster_std=0.60, random_state=0)\n",
"X = X[:, ::-1] # flip axes for better plotting"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Plot the data with K Means Labels\n",
"from sklearn.cluster import KMeans\n",
"kmeans = KMeans(4, random_state=0, max_iter=1)\n",
"labels = kmeans.fit(X).predict(X)\n",
"\n",
"fig = plt.figure(figsize=(12, 7))\n",
"fig.subplots_adjust(left=0.12, right=0.97, bottom=0.21, top=0.9, wspace=0.5)\n",
"\n",
"ax = fig.add_subplot(111)\n",
"ax.scatter(X[:,0], X[:,1])\n",
"ax.axis('equal')\n",
"#ax.scatter(X[:, 0], X[:, 1], c=labels, s=40, cmap='viridis');\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fig = plt.figure(figsize=(12, 7))\n",
"fig.subplots_adjust(left=0.12, right=0.97, bottom=0.21, top=0.9, wspace=0.5)\n",
"ax = fig.add_subplot(111)\n",
"kmeans = KMeans(n_clusters=4, random_state=0, max_iter=1)\n",
"plot_kmeans(kmeans, X, n_clusters=4, axs=ax)\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"rng = np.random.RandomState(8)\n",
"X_stretched = np.dot(X, rng.randn(2, 2))\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fig = plt.figure(figsize=(12, 7))\n",
"fig.subplots_adjust(left=0.12, right=0.97, bottom=0.21, top=0.9, wspace=0.5)\n",
"ax = fig.add_subplot(111)\n",
"ax.axis('equal')\n",
"ax.scatter(X_stretched[:, 0], X_stretched[:, 1])#, c=labels, s=40, cmap='viridis');\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fig = plt.figure(figsize=(12, 7))\n",
"fig.subplots_adjust(left=0.12, right=0.97, bottom=0.21, top=0.9, wspace=0.5)\n",
"ax = fig.add_subplot(111)\n",
"\n",
"kmeans = KMeans(n_clusters=4, random_state=0)\n",
"plot_kmeans(kmeans, X_stretched)\n",
"#ax.set_ylim(-4,4)\n",
"#ax.set_xlim(0,10)\n",
"#plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#try clustering with gmm\n",
"#from sklearn.mixture import GaussianMixture as GMM\n",
"\n",
"fig = plt.figure(figsize=(12, 7))\n",
"fig.subplots_adjust(left=0.12, right=0.97, bottom=0.21, top=0.9, wspace=0.5)\n",
"ax = fig.add_subplot(111)\n",
"\n",
"gmm = GMM2(n_components=4).fit(X)\n",
"labels = gmm.predict(X)\n",
"plt.scatter(X[:, 0], X[:, 1]);\n",
"#plt.scatter(X[:, 0], X[:, 1], c=labels, s=40, cmap='viridis');"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#define some plotting functions\n",
"from matplotlib.patches import Ellipse\n",
"\n",
"def draw_ellipse(position, covariance, ax=None, **kwargs):\n",
" \"\"\"Draw an ellipse with a given position and covariance\"\"\"\n",
" ax = ax or plt.gca()\n",
" \n",
" # Convert covariance to principal axes\n",
" if covariance.shape == (2, 2):\n",
" U, s, Vt = np.linalg.svd(covariance)\n",
" angle = np.degrees(np.arctan2(U[1, 0], U[0, 0]))\n",
" width, height = 2 * np.sqrt(s)\n",
" else:\n",
" angle = 0\n",
" width, height = 2 * np.sqrt(covariance)\n",
" \n",
" # Draw the Ellipse\n",
" for nsig in range(1, 4):\n",
" ax.add_patch(Ellipse(position, nsig * width, nsig * height,\n",
" angle, **kwargs))\n",
" \n",
"def plot_gmm(gmm, X, label=True, ax=None):\n",
" ax = ax or plt.gca()\n",
" labels = gmm.fit(X).predict(X)\n",
" if label:\n",
" ax.scatter(X[:, 0], X[:, 1], c=labels, s=40, cmap='viridis', zorder=2)\n",
" else:\n",
" ax.scatter(X[:, 0], X[:, 1], s=40, zorder=2)\n",
" ax.axis('equal')\n",
" \n",
" w_factor = 0.2 / gmm.weights_.max()\n",
" gmm_pars = zip(gmm.means_, gmm.covariances_, gmm.weights_)\n",
" for pos, covar, w in gmm_pars:\n",
" draw_ellipse(pos, covar, alpha=w * w_factor)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#plot GMM results\n",
"fig = plt.figure(figsize=(12, 7))\n",
"fig.subplots_adjust(left=0.12, right=0.97, bottom=0.21, top=0.9, wspace=0.5)\n",
"ax = fig.add_subplot(111)\n",
"\n",
"gmm = GMM2(n_components=4, covariance_type='full', random_state=42)\n",
"plot_gmm(gmm, X)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<table>\n",
" <tr>\n",
" <td>\n",
" Full Covariance Matrix\n",
"\\begin{bmatrix} \n",
" \\sigma_{11} & \\sigma_{12} \\\\\n",
" \\sigma_{21} & \\sigma_{22} \\\\\n",
"\\end{bmatrix}\n",
" </td>\n",
" <td>\n",
" ~~~\n",
" </td>\n",
" <td>\n",
" Diagonal Covariance Matrix\n",
"\\begin{bmatrix}\n",
" \\huge\n",
" \\sigma_{11} & 0 \\\\\n",
" 0 & \\sigma_{22}\n",
"\\end{bmatrix}\n",
" </td>\n",
" </tr>\n",
"</table>"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"rng = np.random.RandomState(10)\n",
"X_stretched = np.dot(X, rng.randn(2, 2))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#GMM approximation of X_stretched\n",
"# change the 'covariance_type' ('diag' or 'full') to see the difference.\n",
"fig = plt.figure(figsize=(12, 7))\n",
"fig.subplots_adjust(left=0.12, right=0.97, bottom=0.21, top=0.9, wspace=0.5)\n",
"ax = fig.add_subplot(111)\n",
"\n",
"gmm2 = GMM2(n_components=4, covariance_type='full', random_state=42)\n",
"plot_gmm(gmm2, X_stretched)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Modeling complex data with GMMs\n",
"from sklearn.datasets import make_moons\n",
"Xmoon, ymoon = make_moons(200, noise=0.05, random_state=0)\n",
"\n",
"fig = plt.figure(figsize=(12, 7))\n",
"#fig.subplots_adjust(left=0.12, right=0.97, top=0.9, wspace=0.5)\n",
"ax = fig.add_subplot(111)\n",
"ax.scatter(Xmoon[:,0], Xmoon[:,1])\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fig = plt.figure(figsize=(12, 7))\n",
"fig.subplots_adjust(left=0.12, right=0.97, bottom=0.21, top=0.9, wspace=0.5)\n",
"ax = fig.add_subplot(111)\n",
"\n",
"#try different number of n_components\n",
"my_gmm2 = GMM2(n_components=2, covariance_type='full', random_state=0)\n",
"plot_gmm(my_gmm2, Xmoon)\n",
"print(\"Log Likelikelihood:???\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# start with k-means, and explain the difference between k-means and GMM\n",
"import numpy as np\n",
"#from sklearn.mixture import GMM\n",
"from sklearn.mixture import GaussianMixture as GMM2\n",
"from matplotlib import pyplot as plt\n",
"import warnings\n",
"import time\n",
"\n",
"warnings.simplefilter(\"ignore\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Generate some data\n",
"from sklearn.datasets.samples_generator import make_blobs\n",
"X, y_true = make_blobs(n_samples=500, centers=4, cluster_std=0.60, random_state=0)\n",
"X = X[:, ::-1] # flip axes for better plotting"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#define some plotting functions\n",
"from matplotlib.patches import Ellipse\n",
"\n",
"def draw_ellipse(position, covariance, ax=None, **kwargs):\n",
" \"\"\"Draw an ellipse with a given position and covariance\"\"\"\n",
" ax = ax or plt.gca()\n",
" \n",
" # Convert covariance to principal axes\n",
" if covariance.shape == (2, 2):\n",
" U, s, Vt = np.linalg.svd(covariance)\n",
" angle = np.degrees(np.arctan2(U[1, 0], U[0, 0]))\n",
" width, height = 2 * np.sqrt(s)\n",
" else:\n",
" angle = 0\n",
" width, height = 2 * np.sqrt(covariance)\n",
" \n",
" # Draw the Ellipse\n",
" for nsig in range(1, 4):\n",
" ax.add_patch(Ellipse(position, nsig * width, nsig * height,\n",
" angle, **kwargs))\n",
"\n",
"def plot_intr_gmm(gmm_pars, X, weights, labels=True, ax=None):\n",
" ax = ax or plt.gca()\n",
" #labels = gmm.fit(X).predict(X)\n",
" ax.scatter(X[:, 0], X[:, 1], s=40, zorder=2)\n",
"# if label:\n",
"# ax.scatter(X[:, 0], X[:, 1], c=labels, s=40, cmap='viridis', zorder=2)\n",
"# else:\n",
"# ax.scatter(X[:, 0], X[:, 1], s=40, zorder=2)\n",
" ax.axis('equal')\n",
" \n",
" w_factor = 0.2 / weights.max()\n",
" for pos, covar, w in gmm_pars: #zip(gmm.means_, gmm.covariances_, gmm.weights_):\n",
" draw_ellipse(pos, covar, alpha=w * w_factor)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#GMM training in slow-motion\n",
"from scipy.stats import multivariate_normal as mvn\n",
"from numpy.core.umath_tests import matrix_multiply as mm\n",
"\n",
"def em_gmm_vect(xs, pis, mus, sigmas, plt_axis, tol=0.01, max_iter=100):\n",
"\n",
" n, p = xs.shape\n",
" k = len(pis)\n",
"\n",
" ll_old = 0\n",
" for i in range(max_iter):\n",
" exp_A = []\n",
" exp_B = []\n",
" ll_new = 0\n",
"\n",
" # E-step\n",
" ws = np.zeros((k, n))\n",
" for j in range(k):\n",
" ws[j, :] = pis[j] * mvn(mus[j], sigmas[j]).pdf(xs)\n",
" ws /= ws.sum(0)\n",
"\n",
" # M-step\n",
" pis = ws.sum(axis=1)\n",
" pis /= n\n",
"\n",
" mus = np.dot(ws, xs)\n",
" mus /= ws.sum(1)[:, None]\n",
"\n",
" sigmas = np.zeros((k, p, p))\n",
" for j in range(k):\n",
" ys = xs - mus[j, :]\n",
" sigmas[j] = (ws[j,:,None,None] * mm(ys[:,:,None], ys[:,None,:])).sum(axis=0)\n",
" sigmas /= ws.sum(axis=1)[:,None,None]\n",
"\n",
" # update complete log likelihoood\n",
" ll_new = 0\n",
" for pi, mu, sigma in zip(pis, mus, sigmas):\n",
" ll_new += pi*mvn(mu, sigma).pdf(xs)\n",
" ll_new = np.log(ll_new).sum()\n",
"\n",
" if np.abs(ll_new - ll_old) < tol:\n",
" break\n",
" ll_old = ll_new\n",
"\n",
" print(sigmas.shape)\n",
" covars = sigmas\n",
" gmm_pars = zip(mus, covars, ws)\n",
" plot_intr_gmm(gmm_pars, X, ws, labels=True, ax=plt_axis)\n",
" time.sleep(1)\n",
" assert 0>1, 'stop'\n",
" \n",
" \n",
" return ll_new, pis, mus, sigmas"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#initialize params first\n",
"gmm3 = GMM2(n_components=4, covariance_type='diag', random_state=42)\n",
"gmm3.fit(X)\n",
"print(type(gmm3.weights_))\n",
"means_init = np.random.random((4,2))\n",
"mus = means_init\n",
"sigmas = means_init\n",
"pis = np.random.random(4)\n",
"sum_pis = np.sum(pis)\n",
"pis = pis/sum_pis\n",
"fig = plt.figure(figsize=(12, 7))\n",
"fig.subplots_adjust(left=0.12, right=0.97, bottom=0.21, top=0.9, wspace=0.5)\n",
"ax = fig.add_subplot(111)\n",
"\n",
"\n",
"em_gmm_vect(X, pis, mus, sigmas, tol=0.01, max_iter=1, plt_axis=ax)\n",
"assert 0>1, 'stop'"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Speaker Recognition Experiment with Bob ML Toolkit"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### With VoxForge Dataset: \n",
" - free, public dataset\n",
" - contains audio-recordings of 30 speakers (multiple recordings per speaker)\n",
" - short audio-recordings: ~3sec. long."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#To download data from VoxForge website uncomment and execute the following commands:\n",
"\n",
"#import webbrowser\n",
"#url = 'http://'+ 'www.voxforge.org/downloads'\n",
"#webbrowser.open(url)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## We will use the SPEAR toolkit from Bob -- bob.bio.spear"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#bob.bio.spear documentation\n",
"#url = 'https://' + 'www.idiap.ch/software/bob/docs/bob/bob.bio.spear/stable/index.html'\n",
"#webbrowser.open(url)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## VoxForge Experiment Protocol \n",
"\n",
"The dataset is split into three parts.\n",
"\n",
"- The 'world' set is used to train the Universal Background Model (UBM).\n",
"- The 'dev' set is used to adjust hyperparameters.\n",
"- The 'eval' set is used to compute performance statistics.\n",
"\n",
"\n",
"![](figures/voxforge_protocol2.png)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Hypothesis Testing:\n",
"\n",
"### $H_0$ : Voice matches enrolled identity\n",
"### $H_1$ : Voice comes from some other person\n",
"\n",
"## Likelhood-Ratio Test: Accept $H_0$ if\n",
"\\begin{equation}\n",
"\\large\n",
"\\begin{split}\n",
"\\Lambda(speech) = \\frac{\\mathcal{L}(person = claimed~|~speech)}{\\mathcal{L}(person \\neq claimed~|~speech)}~\\gt~C\\\\ \\\\\n",
" = \\frac{P(speech~|~ speaker\\_model)}{P(speech~|~UBM)}~\\gt~c_1 \\\\ \\\\\n",
" = ln(P(speech~|~ speaker\\_model))~-~ln(P(speech~|~UBM))~\\gt~c_2\n",
"\\end{split}\n",
"\\end{equation}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Algorithm:\n",
"\n",
"Input: x: speech_sample, u:UBM, s:speaker_model\n",
"\n",
"1. Define a threshold c2.\n",
"2. Compute F: array of MFCC features from x\n",
"2. Compute A = ln(p(F|speaker_model)): log(probability that speech-sample was produced by speaker-model of claimed-identity)\n",
"3. Compute B = ln(p(F|UBM)): log(probability that speech was produced by some other person in the world)\n",
"4. Return score = (A - B)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Training UBM for VoxForge dataset\n",
"\n",
"![](figures/asv_training.png)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Given:\n",
"\n",
"1. Pre-computed features: one feature-set per presentation audio.\n",
"2. Pre-computed UBM: GMM with 64 components\n",
"3. Pre-trained threshold, determined using 'dev' set.\n",
"4. Pre-enrolled speakers in 'eval' set.\n",
"\n",
"## To do:\n",
"1. For each enrolled speaker:\n",
" 2. For feature-set of every probe-sample in 'eval' set:\n",
" 3. Determine if sample came from enrolled speaker"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"code_dir = os.getcwd()\n",
"print(code_dir)\n",
"assert os.path.isdir(\"data\"), \"PANIC! 'data' folder does not exist!\"\n",
"os.chdir(\"data\")\n",
"\n",
"print(os.getcwd())\n",
"os.chdir(code_dir)\n",
"print(os.getcwd())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import re, sys\n",
"import numpy\n",
"from pathlib import Path\n",
"\n",
"my_file_name = \"data/voxforge_denoised_scores_dev.txt\"\n",
"%print(\"My file is %s\" %my_file_name)\n",
"my_file = Path(my_file_name)\n",
"assert my_file.is_file(), \"File %s does not exist. Quitting.\" %my_file_name\n",
"\n",
"dsf = open(\"data/voxforge_denoised_scores_dev.txt\", \"r\")\n",
"x = dsf.readlines()\n",
"dsf.close()\n",
"\n",
"dev_gen_scores = [float(line.split()[3]) for line in x if line.split()[0] == line.split()[1]]\n",
"dev_zei_scores = [float(line.split()[3]) for line in x if line.split()[0] != line.split()[1]]\n",
"\n",
"dev_genuine_scores = numpy.array(dev_gen_scores)\n",
"dev_zero_effort_impostor_scores = numpy.array(dev_zei_scores)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Score Distribution for 'dev' set"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"import plots\n",
"import numpy\n",
"\n",
"dev_fmr_01, dev_fmr_001 = plots.plot_hist(dev_zero_effort_impostor_scores, dev_genuine_scores)\n",
"\n",
"print(\"Threshold for FMR=0.1% on dev-set:\" '{:6.2f}'.format(dev_fmr_01) )\n",
"print(\"Threshold for FMR=0.01% on dev-set:\" '{:6.2f}'.format(dev_fmr_001))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plots.plot_det(dev_zero_effort_impostor_scores, dev_genuine_scores)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"\n",
"import re\n",
"import sys\n",
"import os.path\n",
"import bob.bio.gmm\n",
"from bob.db.voxforge import Database\n",
"import numpy\n",