Commit b60aad0b authored by Tiago de Freitas Pereira's avatar Tiago de Freitas Pereira

Merge branch 'pickle' into 'master'

Pickling Objects

See merge request !39
parents 4cfcee92 359b8703
Pipeline #44973 failed with stages
in 5 minutes and 9 seconds
......@@ -13,6 +13,9 @@ from ._library import *
from ._library import GMMMachine as _GMMMachine_C
from ._library import ISVBase as _ISVBase_C
from ._library import ISVMachine as _ISVMachine_C
from ._library import KMeansMachine as _KMeansMachine_C
from ._library import GMMStats as _GMMStats_C
from ._library import IVectorMachine as _IVectorMachine_C
from . import version
from .version import module as __version__
......@@ -20,22 +23,6 @@ from .version import api as __api_version__
from .train import *
def ztnorm_same_value(vect_a, vect_b):
"""Computes the matrix of boolean D for the ZT-norm, which indicates where
the client ids of the T-Norm models and Z-Norm samples match.
vect_a An (ordered) list of client_id corresponding to the T-Norm models
vect_b An (ordered) list of client_id corresponding to the Z-Norm impostor samples
"""
import numpy
sameMatrix = numpy.ndarray((len(vect_a), len(vect_b)), "bool")
for j in range(len(vect_a)):
for i in range(len(vect_b)):
sameMatrix[j, i] = vect_a[j] == vect_b[i]
return sameMatrix
def get_config():
"""Returns a string containing the configuration information.
"""
......@@ -52,7 +39,7 @@ class GMMMachine(_GMMMachine_C):
def update_dict(self, d):
self.means = d["means"]
self.variances = d["variances"]
self.means = d["means"]
self.weights = d["weights"]
@staticmethod
def gmm_shape_from_dict(d):
......@@ -149,3 +136,80 @@ class ISVMachine(_ISVMachine_C):
def __setstate__(self, d):
self.__dict__ = d
self.update_dict(d)
class KMeansMachine(_KMeansMachine_C):
__doc__ = _KMeansMachine_C.__doc__
@staticmethod
def to_dict(kmeans_machine):
kmeans_data = dict()
kmeans_data["means"] = kmeans_machine.means
return kmeans_data
def __getstate__(self):
d = dict(self.__dict__)
d.update(self.__class__.to_dict(self))
return d
def __setstate__(self, d):
means = d["means"]
self.__init__(means.shape[0], means.shape[1])
self.means = means
class GMMStats(_GMMStats_C):
__doc__ = _GMMStats_C.__doc__
@staticmethod
def to_dict(gmm_stats):
gmm_stats_data = dict()
gmm_stats_data["log_likelihood"] = gmm_stats.log_likelihood
gmm_stats_data["t"] = gmm_stats.t
gmm_stats_data["n"] = gmm_stats.n
gmm_stats_data["sum_px"] = gmm_stats.sum_px
gmm_stats_data["sum_pxx"] = gmm_stats.sum_pxx
return gmm_stats_data
def __getstate__(self):
d = dict(self.__dict__)
d.update(self.__class__.to_dict(self))
return d
def __setstate__(self, d):
shape = d["sum_pxx"].shape
self.__init__(shape[0], shape[1])
self.t = d["t"]
self.n = d["n"]
self.log_likelihood = d["log_likelihood"]
self.sum_px = d["sum_px"]
self.sum_pxx = d["sum_pxx"]
class IVectorMachine(_IVectorMachine_C):
__doc__ = _IVectorMachine_C.__doc__
@staticmethod
def to_dict(ivector_machine):
ivector_data = dict()
ivector_data["gmm"] = GMMMachine.to_dict(ivector_machine.ubm)
ivector_data["sigma"] = ivector_machine.sigma
ivector_data["t"] = ivector_machine.t
return ivector_data
def update_dict(self, d):
ubm = GMMMachine.create_from_dict(d["gmm"])
t = d["t"]
self.__init__(ubm, t.shape[1])
self.sigma = d["sigma"]
self.t = t
def __getstate__(self):
d = dict(self.__dict__)
d.update(self.__class__.to_dict(self))
return d
def __setstate__(self, d):
self.__dict__ = d
self.update_dict(d)
......@@ -648,7 +648,7 @@ bool init_BobLearnEMGMMStats(PyObject* module)
// initialize the type struct
PyBobLearnEMGMMStats_Type.tp_name = GMMStats_doc.name();
PyBobLearnEMGMMStats_Type.tp_basicsize = sizeof(PyBobLearnEMGMMStatsObject);
PyBobLearnEMGMMStats_Type.tp_flags = Py_TPFLAGS_DEFAULT;
PyBobLearnEMGMMStats_Type.tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE;
PyBobLearnEMGMMStats_Type.tp_doc = GMMStats_doc.doc();
// set the functions
......
......@@ -651,7 +651,7 @@ bool init_BobLearnEMIVectorMachine(PyObject* module)
// initialize the type struct
PyBobLearnEMIVectorMachine_Type.tp_name = IVectorMachine_doc.name();
PyBobLearnEMIVectorMachine_Type.tp_basicsize = sizeof(PyBobLearnEMIVectorMachineObject);
PyBobLearnEMIVectorMachine_Type.tp_flags = Py_TPFLAGS_DEFAULT;
PyBobLearnEMIVectorMachine_Type.tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE;
PyBobLearnEMIVectorMachine_Type.tp_doc = IVectorMachine_doc.doc();
// set the functions
......
......@@ -843,7 +843,7 @@ bool init_BobLearnEMKMeansMachine(PyObject* module)
// initialize the type struct
PyBobLearnEMKMeansMachine_Type.tp_name = KMeansMachine_doc.name();
PyBobLearnEMKMeansMachine_Type.tp_basicsize = sizeof(PyBobLearnEMKMeansMachineObject);
PyBobLearnEMKMeansMachine_Type.tp_flags = Py_TPFLAGS_DEFAULT;
PyBobLearnEMKMeansMachine_Type.tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE;
PyBobLearnEMKMeansMachine_Type.tp_doc = KMeansMachine_doc.doc();
// set the functions
......
......@@ -2,26 +2,39 @@
# vim: set fileencoding=utf-8 :
# Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
from bob.learn.em import GMMMachine, ISVBase, ISVMachine
from bob.learn.em import (
GMMMachine,
ISVBase,
ISVMachine,
KMeansMachine,
GMMStats,
IVectorMachine,
)
import numpy
import pickle
def test_gmm_machine():
gmm_machine = GMMMachine(3,3)
gmm_machine.means = numpy.arange(9).reshape(3,3).astype("float")
gmm_machine = GMMMachine(3, 3)
gmm_machine.means = numpy.arange(9).reshape(3, 3).astype("float")
gmm_machine_after_pickle = pickle.loads(pickle.dumps(gmm_machine))
assert numpy.allclose(gmm_machine_after_pickle.means, gmm_machine_after_pickle.means, 10e-3)
assert numpy.allclose(gmm_machine_after_pickle.variances, gmm_machine_after_pickle.variances, 10e-3)
assert numpy.allclose(gmm_machine_after_pickle.weights, gmm_machine_after_pickle.weights, 10e-3)
assert numpy.allclose(
gmm_machine_after_pickle.means, gmm_machine_after_pickle.means, 10e-3
)
assert numpy.allclose(
gmm_machine_after_pickle.variances, gmm_machine_after_pickle.variances, 10e-3
)
assert numpy.allclose(
gmm_machine_after_pickle.weights, gmm_machine_after_pickle.weights, 10e-3
)
def test_isv_base():
ubm = GMMMachine(3,3)
ubm.means = numpy.arange(9).reshape(3,3).astype("float")
ubm = GMMMachine(3, 3)
ubm.means = numpy.arange(9).reshape(3, 3).astype("float")
isv_base = ISVBase(ubm, 2)
isv_base.u = numpy.arange(18).reshape(9,2).astype("float")
isv_base.u = numpy.arange(18).reshape(9, 2).astype("float")
isv_base.d = numpy.arange(9).astype("float")
isv_base_after_pickle = pickle.loads(pickle.dumps(isv_base))
......@@ -33,31 +46,97 @@ def test_isv_base():
def test_isv_machine():
# Creates a UBM
weights = numpy.array([0.4, 0.6], 'float64')
means = numpy.array([[1, 6, 2], [4, 3, 2]], 'float64')
variances = numpy.array([[1, 2, 1], [2, 1, 2]], 'float64')
ubm = GMMMachine(2,3)
weights = numpy.array([0.4, 0.6], "float64")
means = numpy.array([[1, 6, 2], [4, 3, 2]], "float64")
variances = numpy.array([[1, 2, 1], [2, 1, 2]], "float64")
ubm = GMMMachine(2, 3)
ubm.weights = weights
ubm.means = means
ubm.variances = variances
# Creates a ISVBaseMachine
U = numpy.array([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12]], 'float64')
#V = numpy.array([[0], [0], [0], [0], [0], [0]], 'float64')
d = numpy.array([0, 1, 0, 1, 0, 1], 'float64')
base = ISVBase(ubm,2)
U = numpy.array([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12]], "float64")
# V = numpy.array([[0], [0], [0], [0], [0], [0]], 'float64')
d = numpy.array([0, 1, 0, 1, 0, 1], "float64")
base = ISVBase(ubm, 2)
base.u = U
base.d = d
# Creates a ISVMachine
z = numpy.array([3,4,1,2,0,1], 'float64')
x = numpy.array([1,2], 'float64')
z = numpy.array([3, 4, 1, 2, 0, 1], "float64")
x = numpy.array([1, 2], "float64")
isv_machine = ISVMachine(base)
isv_machine.z = z
isv_machine.x = x
isv_machine_after_pickle = pickle.loads(pickle.dumps(isv_machine))
assert numpy.allclose(isv_machine_after_pickle.isv_base.u, isv_machine.isv_base.u, 10e-3)
assert numpy.allclose(isv_machine_after_pickle.isv_base.d, isv_machine.isv_base.d, 10e-3)
assert numpy.allclose(
isv_machine_after_pickle.isv_base.u, isv_machine.isv_base.u, 10e-3
)
assert numpy.allclose(
isv_machine_after_pickle.isv_base.d, isv_machine.isv_base.d, 10e-3
)
assert numpy.allclose(isv_machine_after_pickle.x, isv_machine.x, 10e-3)
assert numpy.allclose(isv_machine_after_pickle.z, isv_machine.z, 10e-3)
def test_kmeans_machine():
# Test a KMeansMachine
means = numpy.array([[3, 70, 0], [4, 72, 0]], "float64")
mean = numpy.array([3, 70, 1], "float64")
# Initializes a KMeansMachine
kmeans_machine = KMeansMachine(2, 3)
kmeans_machine.means = means
kmeans_machine_after_pickle = pickle.loads(pickle.dumps(kmeans_machine))
assert numpy.allclose(
kmeans_machine_after_pickle.means, kmeans_machine.means, 10e-3
)
def test_gmmstats():
gs = GMMStats(2, 3)
log_likelihood = -3.0
T = 1
n = numpy.array([0.4, 0.6], numpy.float64)
sumpx = numpy.array([[1.0, 2.0, 3.0], [2.0, 4.0, 3.0]], numpy.float64)
sumpxx = numpy.array([[10.0, 20.0, 30.0], [40.0, 50.0, 60.0]], numpy.float64)
gs.log_likelihood = log_likelihood
gs.t = T
gs.n = n
gs.sum_px = sumpx
gs.sum_pxx = sumpxx
gs_after_pickle = pickle.loads(pickle.dumps(gs))
assert gs == gs_after_pickle
def test_ivector_machine():
# Ubm
ubm = GMMMachine(2, 3)
ubm.weights = numpy.array([0.4, 0.6])
ubm.means = numpy.array([[1.0, 7, 4], [4, 5, 3]])
ubm.variances = numpy.array([[0.5, 1.0, 1.5], [1.0, 1.5, 2.0]])
ivector_machine = IVectorMachine(ubm, 2)
t = numpy.array([[1.0, 2], [4, 1], [0, 3], [5, 8], [7, 10], [11, 1]])
sigma = numpy.array([1.0, 2.0, 1.0, 3.0, 2.0, 4.0])
ivector_machine.t = t
ivector_machine.sigma = sigma
ivector_after_pickle = pickle.loads(pickle.dumps(ivector_machine))
assert numpy.allclose(ivector_after_pickle.sigma, ivector_machine.sigma, 10e-3)
assert numpy.allclose(ivector_after_pickle.t, ivector_machine.t, 10e-3)
assert numpy.allclose(
ivector_after_pickle.ubm.means, ivector_machine.ubm.means, 10e-3
)
assert numpy.allclose(
ivector_after_pickle.ubm.variances, ivector_machine.ubm.variances, 10e-3
)
assert numpy.allclose(
ivector_after_pickle.ubm.weights, ivector_machine.ubm.weights, 10e-3
)
......@@ -234,7 +234,9 @@ from bob.extension.utils import link_documentation, load_requirements
sphinx_requirements = "extra-intersphinx.txt"
if os.path.exists(sphinx_requirements):
intersphinx_mapping = link_documentation(
additional_packages=['python', 'numpy'] + load_requirements(sphinx_requirements))
additional_packages=['python', 'numpy'] + \
load_requirements(sphinx_requirements)
)
else:
intersphinx_mapping = link_documentation()
......
......@@ -695,97 +695,6 @@ computed, which is defined in more formal way by:
shutil.rmtree(temp_dir)
Score Normalization
-------------------
Score normalization aims to compensate statistical variations in output scores
due to changes in the conditions across different enrollment and probe samples.
This is achieved by scaling distributions of system output scores to better
facilitate the application of a single, global threshold for authentication.
Bob has implemented 3 different strategies to normalize scores and these
strategies are presented in the next subsections.
Z-Norm
======
.. _znorm:
Given a score :math:`s_i`, Z-Norm [Auckenthaler2000]_ and [Mariethoz2005]_
(zero-normalization) scales this value by the mean (:math:`\mu`) and standard
deviation (:math:`\sigma`) of an impostor score distribution. This score
distribution can be computed before hand and it is defined as the following.
.. math::
zs_i = \frac{s_i - \mu}{\sigma}
This scoring technique is implemented in :py:func:`bob.learn.em.znorm`. Follow
bellow an example of score normalization using :py:func:`bob.learn.em.znorm`.
.. plot:: plot/plot_Znorm.py
:include-source: True
.. note::
Observe how the scores were scaled in the plot above.
T-Norm
======
.. _tnorm:
T-norm [Auckenthaler2000]_ and [Mariethoz2005]_ (Test-normalization) operates
in a probe-centric manner. If in the Z-Norm :math:`\mu` and :math:`\sigma` are
estimated using an impostor set of models and its scores, the t-norm computes
these statistics using the current probe sample against at set of models in a
co-hort :math:`\Theta_{c}`. A co-hort can be any semantic organization that is
sensible to your recognition task, such as sex (male and females), ethnicity,
age, etc and is defined as the following.
.. math::
zs_i = \frac{s_i - \mu}{\sigma}
where, :math:`s_i` is :math:`P(x_i | \Theta)` (the score given the claimed
model), :math:`\mu = \frac{ \sum\limits_{i=0}^{N} P(x_i | \Theta_{c}) }{N}`
(:math:`\Theta_{c}` are the models of one co-hort) and :math:`\sigma` is the
standard deviation computed using the same criteria used to compute
:math:`\mu`.
This scoring technique is implemented in :py:func:`bob.learn.em.tnorm`. Follow
bellow an example of score normalization using :py:func:`bob.learn.em.tnorm`.
.. plot:: plot/plot_Tnorm.py
:include-source: True
.. note::
T-norm introduces extra computation during scoring, as the probe samples
need to be compared to each cohort model in order to have :math:`\mu` and
:math:`\sigma`.
ZT-Norm
=======
.. _ztnorm:
ZT-Norm [Auckenthaler2000]_ and [Mariethoz2005]_ consists in the application of
:ref:`Z-Norm <znorm>` followed by a :ref:`T-Norm <tnorm>` and it is implemented
in :py:func:`bob.learn.em.ztnorm`.
Follow bellow an example of score normalization using
:py:func:`bob.learn.em.ztnorm`.
.. plot:: plot/plot_ZTnorm.py
:include-source: True
.. note::
Observe how the scores were scaled in the plot above.
.. Place here your external references
.. include:: links.rst
......
......@@ -49,10 +49,6 @@ References
.. [Glembek2009] Glembek, Ondrej, et al. "Comparison of scoring methods used in speaker recognition with joint factor analysis." Acoustics, Speech and Signal Processing, 2009. ICASSP 2009. IEEE International Conference on. IEEE, 2009.
.. [Auckenthaler2000] Auckenthaler, Roland, Michael Carey, and Harvey Lloyd-Thomas. "Score normalization for text-independent speaker verification systems." Digital Signal Processing 10.1 (2000): 42-54.
.. [Mariethoz2005] Mariethoz, Johnny, and Samy Bengio. "A unified framework for score normalization techniques applied to text-independent speaker verification." IEEE signal processing letters 12.7 (2005): 532-535.
Indices and tables
------------------
......
py:class bob.learn.em.GMMStats.n
py:class bob.learn.em.GMMStats.sum_px
py:class bob.learn.em.GMMStats.sum_pxx
\ No newline at end of file
......@@ -49,12 +49,8 @@ Functions
.. autosummary::
bob.learn.em.linear_scoring
bob.learn.em.tnorm
bob.learn.em.train
bob.learn.em.train_jfa
bob.learn.em.znorm
bob.learn.em.ztnorm
bob.learn.em.ztnorm_same_value
Detailed Information
--------------------
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment