Commit 90564357 authored by Manuel Günther's avatar Manuel Günther

Improved and uniformized the documentation

parent 1f581e3f
......@@ -13,7 +13,9 @@ from . import openbr
import numpy
def mse (estimation, target):
"""Calculates the mean square error between a set of outputs and target
"""mse(estimation, target) -> error
Calculates the mean square error between a set of outputs and target
values using the following formula:
.. math::
......@@ -28,7 +30,9 @@ def mse (estimation, target):
return numpy.mean((estimation - target)**2, 0)
def rmse (estimation, target):
"""Calculates the root mean square error between a set of outputs and target
"""rmse(estimation, target) -> error
Calculates the root mean square error between a set of outputs and target
values using the following formula:
.. math::
......@@ -43,12 +47,14 @@ def rmse (estimation, target):
return numpy.sqrt(mse(estimation, target))
def relevance (input, machine):
"""Calculates the relevance of every input feature to the estimation process
"""relevance (input, machine) -> relevances
Calculates the relevance of every input feature to the estimation process
using the following definition from:
Neural Triggering System Operating on High Resolution Calorimetry
Information, Anjos et al, April 2006, Nuclear Instruments and Methods in
Physics Research, volume 559, pages 134-138
Neural Triggering System Operating on High Resolution Calorimetry
Information, Anjos et al, April 2006, Nuclear Instruments and Methods in
Physics Research, volume 559, pages 134-138
.. math::
......@@ -73,10 +79,12 @@ def relevance (input, machine):
return retval
def recognition_rate(cmc_scores):
"""Calculates the recognition rate from the given input, which is identical
"""recognition_rate(cmc_scores) -> RR
Calculates the recognition rate from the given input, which is identical
to the rank 1 (C)MC value.
The input has a specific format, which is a list of two-element tuples. Each
The input has a specific format, which is a list of two-element tuples. Each
of the tuples contains the negative and the positive scores for one test
item. To read the lists from score files in 4 or 5 column format, please use
the :py:func:`bob.measure.load.cmc_four_column` or
......@@ -86,10 +94,20 @@ def recognition_rate(cmc_scores):
positive score is greater than or equal to all negative scores, divided by
the number of all test items. If several positive scores for one test item
exist, the **highest** score is taken.
**Parameters:**
``cmc_scores`` : [(array_like(1D, float), array_like(1D, float))]
A list of tuples, where each tuple contains the ``negative`` and ``positive`` scores for one probe of the database
**Returns:**
``RR`` : float
The rank 1 recognition rate, i.e., the relative number of correctly identified identities
"""
# If no scores are given, the recognition rate is exactly 0.
if not cmc_scores:
return 0
return 0.
correct = 0.
for neg, pos in cmc_scores:
......@@ -98,15 +116,17 @@ def recognition_rate(cmc_scores):
max_pos = numpy.max(pos)
# check if the positive score is smaller than all negative scores
if (neg < max_pos).all():
correct += 1
correct += 1.
# return relative number of correctly matched scores
return correct / float(len(cmc_scores))
def cmc(cmc_scores):
"""Calculates the cumulative match characteristic (CMC) from the given input.
"""cmc(cmc_scores) -> curve
Calculates the cumulative match characteristic (CMC) from the given input.
The input has a specific format, which is a list of two-element tuples. Each
The input has a specific format, which is a list of two-element tuples. Each
of the tuples contains the negative and the positive scores for one test
item. To read the lists from score files in 4 or 5 column format, please use
the :py:func:`bob.measure.load.cmc_four_column` or
......@@ -117,6 +137,16 @@ def cmc(cmc_scores):
higher than the positive score. If several positive scores for one test item
exist, the **highest** positive score is taken. The CMC finally computes how
many test items have rank r or higher.
**Parameters:**
``cmc_scores`` : [(array_like(1D, float), array_like(1D, float))]
A list of tuples, where each tuple contains the ``negative`` and ``positive`` scores for one probe of the database
**Returns:**
``curve`` : array_like(2D, float)
The CMC curve, with the Rank in the first column and the number of correctly classified clients (in this rank) in the second column.
"""
# If no scores are given, we cannot plot anything
......@@ -147,6 +177,7 @@ def cmc(cmc_scores):
def get_config():
"""Returns a string containing the configuration information.
"""
import bob.extension
return bob.extension.get_config(__name__, version.externals)
......
......@@ -11,7 +11,20 @@ import math
import numpy
def cllr(negatives, positives):
"""Computes the 'cost of log likelihood ratio' measure as given in the bosaris toolkit"""
"""cllr(negatives, positives) -> cllr
Computes the 'cost of log likelihood ratio' (:math:`C_{llr}`) measure as given in the Bosaris toolkit
**Parameters:**
``negatives, positives`` : array_like(1D, float)
The scores computed by comparing elements from different classes and the same class, respectively.
**Returns**
``cllr`` : float
The computed :math:`C_{llr}` value.
"""
sum_pos, sum_neg = 0., 0.
for pos in positives:
sum_pos += math.log(1. + math.exp(-pos), 2.)
......@@ -21,7 +34,20 @@ def cllr(negatives, positives):
def min_cllr(negatives, positives):
"""Computes the 'minimum cost of log likelihood ratio' measure as given in the bosaris toolkit"""
"""cllr(negatives, positives) -> cllr
Computes the 'minimum cost of log likelihood ratio' (:math:`C_{llr}^{min}`) measure as given in the bosaris toolkit
**Parameters:**
``negatives, positives`` : array_like(1D, float)
The scores computed by comparing elements from different classes and the same class, respectively.
**Returns**
``min_cllr`` : float
The computed :math:`C_{llr}^{min}` value.
"""
from bob.math import pavx
......
......@@ -11,16 +11,20 @@ import tarfile
import os
def open_file(filename):
"""Opens the given score file for reading.
"""open_file(filename) -> file_like
Opens the given score file for reading.
Score files might be raw text files, or a tar-file including a single score file inside.
Parameters:
**Parameters:**
filename : str or file-like
``filename`` : str or file-like
The name of the score file to open, or a file-like object open for reading.
If a file name is given, the according file might be a raw text file or a (compressed) tar file containing a raw text file.
Returns:
**Returns:**
``file_like`` : file-like
A read-only file-like object as it would be returned by open().
"""
if not isinstance(filename, str) and hasattr(filename, 'read'):
......@@ -47,20 +51,32 @@ def open_file(filename):
def four_column(filename):
"""Loads a score set from a single file to memory.
"""four_column(filename) -> claimed_id, real_id, test_label, score
Loads a score set from a single file and yield its lines (to avoid loading the score file at once into memory).
This function verifies that all fields are correctly placed and contain valid fields.
The score file must contain the following information in each line:
claimed_id real_id test_label score
**Parametes:**
``filename`` : str or file-like
The file object that will be opened with :py:func:`open_file` containing the scores.
Verifies that all fields are correctly placed and contain valid fields.
**Yields:**
Returns a python generator of tuples containing the following fields:
``claimed_id`` : str
The claimed identity -- the client name of the model that was used in the comparison
[0]
claimed identity (string)
[1]
real identity (string)
[2]
test label (string)
[3]
score (float)
``real_id`` : str
The real identity -- the client name of the probe that was used in the comparison
``test_label`` : str
A label of the probe -- usually the probe file name, or the probe id
``score`` : float
The result of the comparison of the model and the probe
"""
for i, l in enumerate(open_file(filename)):
......@@ -78,17 +94,28 @@ def four_column(filename):
def split_four_column(filename):
"""Loads a score set from a single file to memory and splits the scores
between positives and negatives. The score file has to respect the 4 column
format as defined in the method four_column().
"""split_four_column(filename) -> negatives, positives
Loads a score set from a single file and splits the scores
between negatives and positives. The score file has to respect the 4 column
format as defined in the method :py:func:`four_column`.
This method avoids loading and allocating memory for the strings present in
the file. We only keep the scores.
Returns a python tuple (negatives, positives). The values are 1-D blitz
arrays of float64.
"""
**Parameters:**
``filename`` : str or file-like
The file that will be opened with :py:func:`open_file` containing the scores.
**Returns:**
``negatives`` : array_like(1D, float)
The list of ``score``'s, for which the ``claimed_id`` and the ``real_id`` differed (see :py:func:`four_column`).
``positives`` : array_like(1D, float)
The list of ``score``'s, for which the ``claimed_id`` and the ``real_id`` are identical (see :py:func:`four_column`).
"""
# split in positives and negatives
neg = []
pos = []
......@@ -102,15 +129,26 @@ def split_four_column(filename):
return (numpy.array(neg, numpy.float64), numpy.array(pos, numpy.float64))
def cmc_four_column(filename):
"""Loads scores to compute CMC curves from a file in four column format.
The four column file needs to be in the same format as described in the four_column function,
and the "test label" (column 3) has to contain the test/probe file name.
"""cmc_four_column(filename) -> cmc_scores
Loads scores to compute CMC curves from a file in four column format.
The four column file needs to be in the same format as described in :py:func:`four_column`,
and the ``test_label`` (column 3) has to contain the test/probe file name or a probe id.
This function returns a list of tuples.
For each probe file, the tuple consists of a list of negative scores and a list of positive scores.
Usually, the list of positive scores should contain only one element, but more are allowed.
The result of this function can directly be passed to, e.g., the :py:func:`bob.measure.cmc` function.
**Parameters:**
``filename`` : str or file-like
The file that will be opened with :py:func:`open_file` containing the scores.
**Returns:**
``cmc_scores`` : [(array_like(1D, float), array_like(1D, float))]
A list of tuples, where each tuple contains the ``negative`` and ``positive`` scores for one probe of the database
"""
# extract positives and negatives
pos_dict = {}
......@@ -149,22 +187,35 @@ def cmc_four_column(filename):
return retval
def five_column(filename):
"""Loads a score set from a single file to memory.
Verifies that all fields are correctly placed and contain valid fields.
Returns a python generator of tuples containing the following fields:
[0]
claimed identity (string)
[1]
model label (string)
[2]
real identity (string)
[3]
test label (string)
[4]
score (float)
"""five_column(filename) -> claimed_id, model_label, real_id, test_label, score
Loads a score set from a single file and yield its lines (to avoid loading the score file at once into memory).
This function verifies that all fields are correctly placed and contain valid fields.
The score file must contain the following information in each line:
claimed_id model_label real_id test_label score
**Parametes:**
``filename`` : str or file-like
The file object that will be opened with :py:func:`open_file` containing the scores.
**Yields:**
``claimed_id`` : str
The claimed identity -- the client name of the model that was used in the comparison
``model_label`` : str
A label for the model -- usually the model file name, or the model id
``real_id`` : str
The real identity -- the client name of the probe that was used in the comparison
``test_label`` : str
A label of the probe -- usually the probe file name, or the probe id
``score`` : float
The result of the comparison of the model and the probe.
"""
for i, l in enumerate(open_file(filename)):
......@@ -181,15 +232,27 @@ def five_column(filename):
yield (field[0], field[1], field[2], field[3], score)
def split_five_column(filename):
"""Loads a score set from a single file to memory and splits the scores
between positives and negatives. The score file has to respect the 5 column
format as defined in the method five_column().
"""split_five_column(filename) -> negatives, positives
Loads a score set from a single file in five column format and splits the scores
between negatives and positives. The score file has to respect the 4 column
format as defined in the method :py:func:`five_column`.
This method avoids loading and allocating memory for the strings present in
the file. We only keep the scores.
Returns a python tuple (negatives, positives). The values are 1-D blitz
arrays of float64.
**Parameters:**
``filename`` : str or file-like
The file that will be opened with :py:func:`open_file` containing the scores.
**Returns:**
``negatives`` : array_like(1D, float)
The list of ``score``'s, for which the ``claimed_id`` and the ``real_id`` differed (see :py:func:`five_column`).
``positives`` : array_like(1D, float)
The list of ``score``'s, for which the ``claimed_id`` and the ``real_id`` are identical (see :py:func:`five_column`).
"""
# split in positives and negatives
......@@ -205,15 +268,26 @@ def split_five_column(filename):
return (numpy.array(neg, numpy.float64), numpy.array(pos, numpy.float64))
def cmc_five_column(filename):
"""Loads scores to compute CMC curves from a file in five column format.
The four column file needs to be in the same format as described in the five_column function,
and the "test label" (column 4) has to contain the test/probe file name.
"""cmc_four_column(filename) -> cmc_scores
Loads scores to compute CMC curves from a file in five column format.
The four column file needs to be in the same format as described in :py:func:`five_column`,
and the ``test_label`` (column 4) has to contain the test/probe file name or a probe id.
This function returns a list of tuples.
For each probe file, the tuple consists of a list of negative scores and a list of positive scores.
Usually, the list of positive scores should contain only one element, but more are allowed.
The result of this function can directly be passed to, e.g., the :py:func:`bob.measure.cmc` function.
**Parameters:**
``filename`` : str or file-like
The file that will be opened with :py:func:`open_file` containing the scores.
**Returns:**
``cmc_scores`` : [(array_like(1D, float), array_like(1D, float))]
A list of tuples, where each tuple contains the ``negative`` and ``positive`` scores for one probe of the database
"""
# extract positives and negatives
pos_dict = {}
......
......@@ -25,23 +25,23 @@ def write_matrix(
If ``search`` is given (as an integer), the resulting matrix files will be in the *search* format, keeping the given number of gallery scores with the highest values for each probe.
.. warning::
When provided with a 4-column score file, this function will work only, if there is only a single model id for each client.
When provided with a 4-column score file, this function will work only, if there is only a single model id for each client.
Keyword parameters:
**Parameters:**
score_file : str
``score_file`` : str
The 4 or 5 column style score file written by bob.
matrix_file : str
``matrix_file`` : str
The OpenBR matrix file that should be written.
Usually, the file name extension is ``.mtx``
mask_file : str
``mask_file`` : str
The OpenBR mask file that should be written.
The mask file defines, which values are positives, negatives or to be ignored.
Usually, the file name extension is ``.mask``
model_names : [str] or ``None``
``model_names`` : [str] or ``None``
If given, the matrix will be written in the same order as the given model names.
The model names must be identical with the second column in the 5-column ``score_file``.
......@@ -51,21 +51,21 @@ def write_matrix(
Only the scores of the given models will be considered.
probe_names : [str] or ``None``
``probe_names`` : [str] or ``None``
If given, the matrix will be written in the same order as the given probe names (the ``path`` of the probe).
The probe names are identical to the third column of the 4-column (or the fourth column of the 5-column) ``score_file``.
Only the scores of the given probe names will be considered in this case.
score_file_format : one of ``('4column', '5column')``
The format, in which the ``score_file`` is.
``score_file_format`` : one of ``('4column', '5column')``
The format, in which the ``score_file`` is; defaults to ``'4column'``
gallery_file_name : str
``gallery_file_name`` : str
The name of the gallery file that will be written in the header of the OpenBR files.
probe_file_name : str
``probe_file_name`` : str
The name of the probe file that will be written in the header of the OpenBR files.
search : int or ``None``
``search`` : int or ``None``
If given, the scores will be sorted per probe, keeping the specified number of highest scores.
If the given number is higher than the models, ``NaN`` values will be added, and the mask will contain ``0x00`` values.
"""
......@@ -119,8 +119,8 @@ def write_matrix(
for line in read_function(score_file):
client, model, id, probe, score = line[0], line[offset], line[1+offset], line[2+offset], line[3+offset]
assert model in model_dict
assert probe in probe_dict
assert model in model_dict, "model " + model + " unknown"
assert probe in probe_dict, "probe " + probe + " unknown"
model_index = model_dict[model]
probe_index = probe_dict[probe]
......@@ -191,46 +191,46 @@ def write_score_file(
This is, what OpenBR is doing as well.
However, you can also set ``replace_nan`` to any value, which will be written instead of the NaN values.
Keyword parameters:
**Parameters:**
matrix_file : str
``matrix_file`` : str
The OpenBR matrix file that should be read.
Usually, the file name extension is ``.mtx``
mask_file : str
``mask_file`` : str
The OpenBR mask file that should be read.
Usually, the file name extension is ``.mask``
score_file : str
``score_file`` : str
The 4 or 5 column style score file that should be written.
models_ids : [str] or ``None``
``models_ids`` : [str] or ``None``
The client ids of the models that will be written in the first column of the score file.
If given, the size must be identical to the number of models (gallery templates) in the OpenBR files.
If not given, client ids of the model will be identical to the **gallery index** in the matrix file.
probes_ids : [str] or ``None``:
``probes_ids`` : [str] or ``None``:
The client ids of the probes that will be written in the second/third column of the four/five column score file.
If given, the size must be identical to the number of probe templates in the OpenBR files.
It will be checked that the OpenBR mask fits to the model/probe client ids.
If not given, the probe ids will be estimated automatically, i.e., to fit the OpenBR matrix.
model_names : [str] or ``None``
``model_names`` : [str] or ``None``
A list of model path written in the second column of the five column score file.
If not given, the model index in the OpenBR file will be used.
.. note::
This entry is ignored in the four column score file format.
probe_names : [str] or ``None``
``probe_names`` : [str] or ``None``
A list of probe path to be written in the third/fourth column in the four/five column score file.
If given, the size must be identical to the number of probe templates in the OpenBR files.
If not given, the probe index in the OpenBR file will be used.
score_file_format : one of ``('4column', '5column')``
The format, in which the ``score_file`` should be written.
``score_file_format`` : one of ``('4column', '5column')``
The format, in which the ``score_file`` should be written; defaults to ``'4column'``.
replace_nan : float or ``None``:
``replace_nan`` : float or ``None``:
If NaN values are encountered in the OpenBR matrix (which are not ignored due to the mask being non-NULL), this value will be written instead.
If ``None``, the values will not be written in the score file at all.
"""
......@@ -301,7 +301,6 @@ def write_score_file(
if probe_names is None:
probe_names = [str(p+1) for p in range(mask.shape[0])]
# iterate through the files and write scores
with open(score_file, 'w') as f:
for g in range(mask.shape[1]):
......
This diff is collapsed.
......@@ -125,6 +125,9 @@ def test_thresholding():
negatives = bob.io.base.load(F('nonsep-negatives.hdf5'))
threshold = eer_threshold(negatives, positives)
sorted_positives = numpy.sort(positives)
sorted_negatives = numpy.sort(negatives)
# Of course we have to make sure that will set the EER correctly:
ccp = count(correctly_classified_positives(positives,threshold))
ccn = count(correctly_classified_negatives(negatives,threshold))
......@@ -132,8 +135,8 @@ def test_thresholding():
for t in (0, 0.001, 0.1, 0.5, 0.9, 0.999, 1):
# Lets also test the far_threshold and the frr_threshold functions
threshold_far = far_threshold(negatives, positives, t)
threshold_frr = frr_threshold(negatives, positives, t)
threshold_far = far_threshold(sorted_negatives, [], t, is_sorted=True)
threshold_frr = frr_threshold([], sorted_positives, t, is_sorted=True)
# Check that the requested FAR and FRR values are smaller than the requested ones
far = farfrr(negatives, positives, threshold_far)[0]
frr = farfrr(negatives, positives, threshold_frr)[1]
......@@ -151,7 +154,8 @@ def test_thresholding():
positives = bob.io.base.load(F('linsep-positives.hdf5'))
negatives = bob.io.base.load(F('linsep-negatives.hdf5'))
threshold = eer_threshold(negatives, positives)
# the result here is 3.242 (which is what is expect ;-)
# the result here is 3.2 (which is what is expect ;-)
assert threshold == 3.2
# Of course we have to make sure that will set the EER correctly:
ccp = count(correctly_classified_positives(positives,threshold))
......@@ -161,7 +165,7 @@ def test_thresholding():
# The second option for the calculation of the threshold is to use the
# minimum HTER.
threshold2 = min_hter_threshold(negatives, positives)
# the result here is 3.242 (which is what is expect ;-)
assert threshold2 == 3.2
nose.tools.eq_(threshold, threshold2) #in this particular case
# Of course we have to make sure that will set the EER correctly:
......@@ -172,7 +176,7 @@ def test_thresholding():
def test_plots():
from . import eer_threshold, roc, precision_recall_curve, det, epc
from . import eer_threshold, roc, roc_for_far, precision_recall_curve, det, epc
# This test set is not separable.
positives = bob.io.base.load(F('nonsep-positives.hdf5'))
......@@ -186,6 +190,14 @@ def test_plots():
xyref = bob.io.base.load(F('nonsep-roc.hdf5'))
assert numpy.array_equal(xy, xyref)
# This example will test the ROC for FAR plot calculation functionality.
far = [0.01, 0.1, 1]
ref = [0.48, 0.22, 0]
xy = roc_for_far(negatives, positives, far)
# uncomment the next line to save a reference value
assert numpy.array_equal(xy[0], far)
assert numpy.array_equal(xy[1], ref)
# This example will test the Precision-Recall plot calculation functionality.
xy = precision_recall_curve(negatives, positives, 100)
# uncomment the next line to save a reference value
......
......@@ -252,7 +252,7 @@ autodoc_default_flags = ['members', 'undoc-members', 'inherited-members', 'show-
# For inter-documentation mapping:
from bob.extension.utils import link_documentation
intersphinx_mapping = link_documentation()
intersphinx_mapping = link_documentation(['python', 'numpy', 'matplotlib'])
def setup(app):
......
......@@ -40,7 +40,7 @@ formula:
.. math::
HTER(\tau, \mathcal{D}) = \frac{FAR(\tau, \mathcal{D}) + FRR(\tau, \mathcal{D})}{2} \quad \textrm{[\%]}
HTER(\tau, \mathcal{D}) = \frac{FAR(\tau, \mathcal{D}) + FRR(\tau, \mathcal{D})}{2} \quad \textrm{[\%]}
where :math:`\mathcal{D}` denotes the dataset used. Since both the FAR and the
FRR depends on the threshold :math:`\tau`, they are strongly related to each
......@@ -60,7 +60,7 @@ scenarios, the threshold :math:`\tau` has to be set a priori: this is typically
done using a development set (also called cross-validation set). Nevertheless,
the optimal threshold can be different depending on the relative importance
given to the FAR and the FRR. Hence, in the EPC framework, the cost
:math:`\beta \in [0;1]` is defined as the tradeoff between the FAR and FRR. The
:math:`\beta \in [0;1]` is defined as the trade-off between the FAR and FRR. The
optimal threshold :math:`\tau^*` is then computed using different values of
:math:`\beta`, corresponding to different operating points:
......@@ -77,14 +77,14 @@ defined in the first equation.
.. note::
Most of the methods availabe in this module require as input a set of 2
Most of the methods available in this module require as input a set of 2
:py:class:`numpy.ndarray` objects that contain the scores obtained by the
classification system to be evaluated, without specific order. Most of the
classes that are defined to deal with two-class problems. Therefore, in this
setting, and throughout this manual, we have defined that the **negatives**
represents the impostor attacks or false class accesses (that is when a
sample of class A is given to the classifier of another class, such as class
B) for of the classifier. The second set, refered as the **positives**
B) for of the classifier. The second set, referred as the **positives**
represents the true class accesses or signal response of the classifier. The
vectors are called this way because the procedures implemented in this module
expects that the scores of **negatives** to be statistically distributed to
......@@ -101,7 +101,7 @@ defined in the first equation.
parsers for formats we use the most. Please refer to the documentation of
:py:mod:`bob.measure.load` for a list of formats and details.
In the remainder of this section we assume you have successfuly parsed and
In the remainder of this section we assume you have successfully parsed and
loaded your scores in two 1D float64 vectors and are ready to evaluate the
performance of the classifier.
......@@ -113,18 +113,18 @@ the following techniques:
.. doctest::
>>> # negatives, positives = parse_my_scores(...) # write parser if not provided!
>>> T = 0.0 #Threshold: later we explain how one can calculate these
>>> correct_negatives = bob.measure.correctly_classified_negatives(negatives, T)
>>> FAR = 1 - (float(correct_negatives.sum())/negatives.size)
>>> correct_positives = bob.measure.correctly_classified_positives(positives, T)
>>> FRR = 1 - (float(correct_positives.sum())/positives.size)
>>> # negatives, positives = parse_my_scores(...) # write parser if not provided!
>>> T = 0.0 #Threshold: later we explain how one can calculate these
>>> correct_negatives = bob.measure.correctly_classified_negatives(negatives, T)
>>> FAR = 1 - (float(correct_negatives.sum())/negatives.size)
>>> correct_positives = bob.measure.correctly_classified_positives(positives, T)
>>> FRR = 1 - (float(correct_positives.sum())/positives.size)
We do provide a method to calculate the FAR and FRR in a single shot:
.. doctest::
>>> FAR, FRR = bob.measure.farfrr(negatives, positives, T)
>>> FAR, FRR = bob.measure.farfrr(negatives, positives, T)
The threshold ``T`` is normally calculated by looking at the distribution of
negatives and positives in a development (or validation) set, selecting a
......@@ -148,15 +148,29 @@ calculation of the threshold:
* Threshold for the minimum weighted error rate (MWER) given a certain cost
:math:`\beta`.
.. code-block:: python
.. doctest:: python
>>> cost = 0.3 #or "beta"
>>> T = bob.measure.min_weighted_error_rate_threshold(negatives, positives, cost)
.. note::
By setting cost to 0.5 is equivalent to use
:py:meth:`bob.measure.min_hter_threshold`.
By setting cost to 0.5 is equivalent to use
:py:func:`bob.measure.min_hter_threshold`.
.. note::
Many functions in ``bob.measure`` have an ``is_sorted`` parameter, which defaults to ``False``, throughout.
However, these functions need sorted ``positive`` and/or ``negative`` scores.
If scores are not in ascendantly sorted order, internally, they will be copied -- twice!
To avoid scores to be copied, you might want to sort the scores in ascending order, e.g., by:
.. doctest:: python
>>> negatives.sort()
>>> positives.sort()
>>> t = bob.measure.min_weighted_error_rate_threshold(negatives, positives, cost, is_sorted = True)
>>> assert T == t
Plotting
--------
......@@ -174,14 +188,14 @@ town. To plot an ROC curve, in possession of your **negatives** and
.. doctest::
>>> from matplotlib import pyplot
>>> # we assume you have your negatives and positives already split
>>> npoints = 100
>>> bob.measure.plot.roc(negatives, positives, npoints, color=(0,0,0), linestyle='-', label='test') # doctest: +SKIP
>>> pyplot.xlabel('FAR (%)') # doctest: +SKIP
>>> pyplot.ylabel('FRR (%)') # doctest: +SKIP
>>> pyplot.grid(True)
>>> pyplot.show() # doctest: +SKIP
>>> from matplotlib import pyplot
>>> # we assume you have your negatives and positives already split
>>> npoints = 100
>>> bob.measure.plot.roc(negatives, positives, npoints, color=(0,0,0), linestyle='-', label='test') # doctest: +SKIP
>>> pyplot.xlabel('FAR (%)') # doctest: +SKIP
>>> pyplot.ylabel('FRR (%)') # doctest: +SKIP
>>> pyplot.grid(True)
>>> pyplot.show() # doctest: +SKIP
You should see an image like the following one:
......
......@@ -10,11 +10,7 @@
Bob's Metric Routines
=======================
.. todolist::
This module contains base functionality from Bob bound to Python, available in