Commit 90564357 authored by Manuel Günther's avatar Manuel Günther
Browse files

Improved and uniformized the documentation

parent 1f581e3f
......@@ -13,7 +13,9 @@ from . import openbr
import numpy
def mse (estimation, target):
"""Calculates the mean square error between a set of outputs and target
"""mse(estimation, target) -> error
Calculates the mean square error between a set of outputs and target
values using the following formula:
.. math::
......@@ -28,7 +30,9 @@ def mse (estimation, target):
return numpy.mean((estimation - target)**2, 0)
def rmse (estimation, target):
"""Calculates the root mean square error between a set of outputs and target
"""rmse(estimation, target) -> error
Calculates the root mean square error between a set of outputs and target
values using the following formula:
.. math::
......@@ -43,12 +47,14 @@ def rmse (estimation, target):
return numpy.sqrt(mse(estimation, target))
def relevance (input, machine):
"""Calculates the relevance of every input feature to the estimation process
"""relevance (input, machine) -> relevances
Calculates the relevance of every input feature to the estimation process
using the following definition from:
Neural Triggering System Operating on High Resolution Calorimetry
Information, Anjos et al, April 2006, Nuclear Instruments and Methods in
Physics Research, volume 559, pages 134-138
Neural Triggering System Operating on High Resolution Calorimetry
Information, Anjos et al, April 2006, Nuclear Instruments and Methods in
Physics Research, volume 559, pages 134-138
.. math::
......@@ -73,10 +79,12 @@ def relevance (input, machine):
return retval
def recognition_rate(cmc_scores):
"""Calculates the recognition rate from the given input, which is identical
"""recognition_rate(cmc_scores) -> RR
Calculates the recognition rate from the given input, which is identical
to the rank 1 (C)MC value.
The input has a specific format, which is a list of two-element tuples. Each
The input has a specific format, which is a list of two-element tuples. Each
of the tuples contains the negative and the positive scores for one test
item. To read the lists from score files in 4 or 5 column format, please use
the :py:func:`bob.measure.load.cmc_four_column` or
......@@ -86,10 +94,20 @@ def recognition_rate(cmc_scores):
positive score is greater than or equal to all negative scores, divided by
the number of all test items. If several positive scores for one test item
exist, the **highest** score is taken.
**Parameters:**
``cmc_scores`` : [(array_like(1D, float), array_like(1D, float))]
A list of tuples, where each tuple contains the ``negative`` and ``positive`` scores for one probe of the database
**Returns:**
``RR`` : float
The rank 1 recognition rate, i.e., the relative number of correctly identified identities
"""
# If no scores are given, the recognition rate is exactly 0.
if not cmc_scores:
return 0
return 0.
correct = 0.
for neg, pos in cmc_scores:
......@@ -98,15 +116,17 @@ def recognition_rate(cmc_scores):
max_pos = numpy.max(pos)
# check if the positive score is smaller than all negative scores
if (neg < max_pos).all():
correct += 1
correct += 1.
# return relative number of correctly matched scores
return correct / float(len(cmc_scores))
def cmc(cmc_scores):
"""Calculates the cumulative match characteristic (CMC) from the given input.
"""cmc(cmc_scores) -> curve
Calculates the cumulative match characteristic (CMC) from the given input.
The input has a specific format, which is a list of two-element tuples. Each
The input has a specific format, which is a list of two-element tuples. Each
of the tuples contains the negative and the positive scores for one test
item. To read the lists from score files in 4 or 5 column format, please use
the :py:func:`bob.measure.load.cmc_four_column` or
......@@ -117,6 +137,16 @@ def cmc(cmc_scores):
higher than the positive score. If several positive scores for one test item
exist, the **highest** positive score is taken. The CMC finally computes how
many test items have rank r or higher.
**Parameters:**
``cmc_scores`` : [(array_like(1D, float), array_like(1D, float))]
A list of tuples, where each tuple contains the ``negative`` and ``positive`` scores for one probe of the database
**Returns:**
``curve`` : array_like(2D, float)
The CMC curve, with the Rank in the first column and the number of correctly classified clients (in this rank) in the second column.
"""
# If no scores are given, we cannot plot anything
......@@ -147,6 +177,7 @@ def cmc(cmc_scores):
def get_config():
"""Returns a string containing the configuration information.
"""
import bob.extension
return bob.extension.get_config(__name__, version.externals)
......
......@@ -11,7 +11,20 @@ import math
import numpy
def cllr(negatives, positives):
"""Computes the 'cost of log likelihood ratio' measure as given in the bosaris toolkit"""
"""cllr(negatives, positives) -> cllr
Computes the 'cost of log likelihood ratio' (:math:`C_{llr}`) measure as given in the Bosaris toolkit
**Parameters:**
``negatives, positives`` : array_like(1D, float)
The scores computed by comparing elements from different classes and the same class, respectively.
**Returns**
``cllr`` : float
The computed :math:`C_{llr}` value.
"""
sum_pos, sum_neg = 0., 0.
for pos in positives:
sum_pos += math.log(1. + math.exp(-pos), 2.)
......@@ -21,7 +34,20 @@ def cllr(negatives, positives):
def min_cllr(negatives, positives):
"""Computes the 'minimum cost of log likelihood ratio' measure as given in the bosaris toolkit"""
"""cllr(negatives, positives) -> cllr
Computes the 'minimum cost of log likelihood ratio' (:math:`C_{llr}^{min}`) measure as given in the bosaris toolkit
**Parameters:**
``negatives, positives`` : array_like(1D, float)
The scores computed by comparing elements from different classes and the same class, respectively.
**Returns**
``min_cllr`` : float
The computed :math:`C_{llr}^{min}` value.
"""
from bob.math import pavx
......
......@@ -11,16 +11,20 @@ import tarfile
import os
def open_file(filename):
"""Opens the given score file for reading.
"""open_file(filename) -> file_like
Opens the given score file for reading.
Score files might be raw text files, or a tar-file including a single score file inside.
Parameters:
**Parameters:**
filename : str or file-like
``filename`` : str or file-like
The name of the score file to open, or a file-like object open for reading.
If a file name is given, the according file might be a raw text file or a (compressed) tar file containing a raw text file.
Returns:
**Returns:**
``file_like`` : file-like
A read-only file-like object as it would be returned by open().
"""
if not isinstance(filename, str) and hasattr(filename, 'read'):
......@@ -47,20 +51,32 @@ def open_file(filename):
def four_column(filename):
"""Loads a score set from a single file to memory.
"""four_column(filename) -> claimed_id, real_id, test_label, score
Loads a score set from a single file and yield its lines (to avoid loading the score file at once into memory).
This function verifies that all fields are correctly placed and contain valid fields.
The score file must contain the following information in each line:
claimed_id real_id test_label score
**Parametes:**
``filename`` : str or file-like
The file object that will be opened with :py:func:`open_file` containing the scores.
Verifies that all fields are correctly placed and contain valid fields.
**Yields:**
Returns a python generator of tuples containing the following fields:
``claimed_id`` : str
The claimed identity -- the client name of the model that was used in the comparison
[0]
claimed identity (string)
[1]
real identity (string)
[2]
test label (string)
[3]
score (float)
``real_id`` : str
The real identity -- the client name of the probe that was used in the comparison
``test_label`` : str
A label of the probe -- usually the probe file name, or the probe id
``score`` : float
The result of the comparison of the model and the probe
"""
for i, l in enumerate(open_file(filename)):
......@@ -78,17 +94,28 @@ def four_column(filename):
def split_four_column(filename):
"""Loads a score set from a single file to memory and splits the scores
between positives and negatives. The score file has to respect the 4 column
format as defined in the method four_column().
"""split_four_column(filename) -> negatives, positives
Loads a score set from a single file and splits the scores
between negatives and positives. The score file has to respect the 4 column
format as defined in the method :py:func:`four_column`.
This method avoids loading and allocating memory for the strings present in
the file. We only keep the scores.
Returns a python tuple (negatives, positives). The values are 1-D blitz
arrays of float64.
"""
**Parameters:**
``filename`` : str or file-like
The file that will be opened with :py:func:`open_file` containing the scores.
**Returns:**
``negatives`` : array_like(1D, float)
The list of ``score``'s, for which the ``claimed_id`` and the ``real_id`` differed (see :py:func:`four_column`).
``positives`` : array_like(1D, float)
The list of ``score``'s, for which the ``claimed_id`` and the ``real_id`` are identical (see :py:func:`four_column`).
"""
# split in positives and negatives
neg = []
pos = []
......@@ -102,15 +129,26 @@ def split_four_column(filename):
return (numpy.array(neg, numpy.float64), numpy.array(pos, numpy.float64))
def cmc_four_column(filename):
"""Loads scores to compute CMC curves from a file in four column format.
The four column file needs to be in the same format as described in the four_column function,
and the "test label" (column 3) has to contain the test/probe file name.
"""cmc_four_column(filename) -> cmc_scores
Loads scores to compute CMC curves from a file in four column format.
The four column file needs to be in the same format as described in :py:func:`four_column`,
and the ``test_label`` (column 3) has to contain the test/probe file name or a probe id.
This function returns a list of tuples.
For each probe file, the tuple consists of a list of negative scores and a list of positive scores.
Usually, the list of positive scores should contain only one element, but more are allowed.
The result of this function can directly be passed to, e.g., the :py:func:`bob.measure.cmc` function.
**Parameters:**
``filename`` : str or file-like
The file that will be opened with :py:func:`open_file` containing the scores.
**Returns:**
``cmc_scores`` : [(array_like(1D, float), array_like(1D, float))]
A list of tuples, where each tuple contains the ``negative`` and ``positive`` scores for one probe of the database
"""
# extract positives and negatives
pos_dict = {}
......@@ -149,22 +187,35 @@ def cmc_four_column(filename):
return retval
def five_column(filename):
"""Loads a score set from a single file to memory.
Verifies that all fields are correctly placed and contain valid fields.
Returns a python generator of tuples containing the following fields:
[0]
claimed identity (string)
[1]
model label (string)
[2]
real identity (string)
[3]
test label (string)
[4]
score (float)
"""five_column(filename) -> claimed_id, model_label, real_id, test_label, score
Loads a score set from a single file and yield its lines (to avoid loading the score file at once into memory).
This function verifies that all fields are correctly placed and contain valid fields.
The score file must contain the following information in each line:
claimed_id model_label real_id test_label score
**Parametes:**
``filename`` : str or file-like
The file object that will be opened with :py:func:`open_file` containing the scores.
**Yields:**
``claimed_id`` : str
The claimed identity -- the client name of the model that was used in the comparison
``model_label`` : str
A label for the model -- usually the model file name, or the model id
``real_id`` : str
The real identity -- the client name of the probe that was used in the comparison
``test_label`` : str
A label of the probe -- usually the probe file name, or the probe id
``score`` : float
The result of the comparison of the model and the probe.
"""
for i, l in enumerate(open_file(filename)):
......@@ -181,15 +232,27 @@ def five_column(filename):
yield (field[0], field[1], field[2], field[3], score)
def split_five_column(filename):
"""Loads a score set from a single file to memory and splits the scores
between positives and negatives. The score file has to respect the 5 column
format as defined in the method five_column().
"""split_five_column(filename) -> negatives, positives
Loads a score set from a single file in five column format and splits the scores
between negatives and positives. The score file has to respect the 4 column
format as defined in the method :py:func:`five_column`.
This method avoids loading and allocating memory for the strings present in
the file. We only keep the scores.
Returns a python tuple (negatives, positives). The values are 1-D blitz
arrays of float64.
**Parameters:**
``filename`` : str or file-like
The file that will be opened with :py:func:`open_file` containing the scores.
**Returns:**
``negatives`` : array_like(1D, float)
The list of ``score``'s, for which the ``claimed_id`` and the ``real_id`` differed (see :py:func:`five_column`).
``positives`` : array_like(1D, float)
The list of ``score``'s, for which the ``claimed_id`` and the ``real_id`` are identical (see :py:func:`five_column`).
"""
# split in positives and negatives
......@@ -205,15 +268,26 @@ def split_five_column(filename):
return (numpy.array(neg, numpy.float64), numpy.array(pos, numpy.float64))
def cmc_five_column(filename):
"""Loads scores to compute CMC curves from a file in five column format.
The four column file needs to be in the same format as described in the five_column function,
and the "test label" (column 4) has to contain the test/probe file name.
"""cmc_four_column(filename) -> cmc_scores
Loads scores to compute CMC curves from a file in five column format.
The four column file needs to be in the same format as described in :py:func:`five_column`,
and the ``test_label`` (column 4) has to contain the test/probe file name or a probe id.
This function returns a list of tuples.
For each probe file, the tuple consists of a list of negative scores and a list of positive scores.
Usually, the list of positive scores should contain only one element, but more are allowed.
The result of this function can directly be passed to, e.g., the :py:func:`bob.measure.cmc` function.
**Parameters:**
``filename`` : str or file-like
The file that will be opened with :py:func:`open_file` containing the scores.
**Returns:**
``cmc_scores`` : [(array_like(1D, float), array_like(1D, float))]
A list of tuples, where each tuple contains the ``negative`` and ``positive`` scores for one probe of the database
"""
# extract positives and negatives
pos_dict = {}
......
......@@ -25,23 +25,23 @@ def write_matrix(
If ``search`` is given (as an integer), the resulting matrix files will be in the *search* format, keeping the given number of gallery scores with the highest values for each probe.
.. warning::
When provided with a 4-column score file, this function will work only, if there is only a single model id for each client.
When provided with a 4-column score file, this function will work only, if there is only a single model id for each client.
Keyword parameters:
**Parameters:**
score_file : str
``score_file`` : str
The 4 or 5 column style score file written by bob.
matrix_file : str
``matrix_file`` : str
The OpenBR matrix file that should be written.
Usually, the file name extension is ``.mtx``
mask_file : str
``mask_file`` : str
The OpenBR mask file that should be written.
The mask file defines, which values are positives, negatives or to be ignored.
Usually, the file name extension is ``.mask``
model_names : [str] or ``None``
``model_names`` : [str] or ``None``
If given, the matrix will be written in the same order as the given model names.
The model names must be identical with the second column in the 5-column ``score_file``.
......@@ -51,21 +51,21 @@ def write_matrix(
Only the scores of the given models will be considered.
probe_names : [str] or ``None``
``probe_names`` : [str] or ``None``
If given, the matrix will be written in the same order as the given probe names (the ``path`` of the probe).
The probe names are identical to the third column of the 4-column (or the fourth column of the 5-column) ``score_file``.
Only the scores of the given probe names will be considered in this case.
score_file_format : one of ``('4column', '5column')``
The format, in which the ``score_file`` is.
``score_file_format`` : one of ``('4column', '5column')``
The format, in which the ``score_file`` is; defaults to ``'4column'``
gallery_file_name : str
``gallery_file_name`` : str
The name of the gallery file that will be written in the header of the OpenBR files.
probe_file_name : str
``probe_file_name`` : str
The name of the probe file that will be written in the header of the OpenBR files.
search : int or ``None``
``search`` : int or ``None``
If given, the scores will be sorted per probe, keeping the specified number of highest scores.
If the given number is higher than the models, ``NaN`` values will be added, and the mask will contain ``0x00`` values.
"""
......@@ -119,8 +119,8 @@ def write_matrix(
for line in read_function(score_file):
client, model, id, probe, score = line[0], line[offset], line[1+offset], line[2+offset], line[3+offset]
assert model in model_dict
assert probe in probe_dict
assert model in model_dict, "model " + model + " unknown"
assert probe in probe_dict, "probe " + probe + " unknown"
model_index = model_dict[model]
probe_index = probe_dict[probe]
......@@ -191,46 +191,46 @@ def write_score_file(
This is, what OpenBR is doing as well.
However, you can also set ``replace_nan`` to any value, which will be written instead of the NaN values.
Keyword parameters:
**Parameters:**
matrix_file : str
``matrix_file`` : str
The OpenBR matrix file that should be read.
Usually, the file name extension is ``.mtx``
mask_file : str
``mask_file`` : str
The OpenBR mask file that should be read.
Usually, the file name extension is ``.mask``
score_file : str
``score_file`` : str
The 4 or 5 column style score file that should be written.
models_ids : [str] or ``None``
``models_ids`` : [str] or ``None``
The client ids of the models that will be written in the first column of the score file.
If given, the size must be identical to the number of models (gallery templates) in the OpenBR files.
If not given, client ids of the model will be identical to the **gallery index** in the matrix file.
probes_ids : [str] or ``None``:
``probes_ids`` : [str] or ``None``:
The client ids of the probes that will be written in the second/third column of the four/five column score file.
If given, the size must be identical to the number of probe templates in the OpenBR files.
It will be checked that the OpenBR mask fits to the model/probe client ids.
If not given, the probe ids will be estimated automatically, i.e., to fit the OpenBR matrix.
model_names : [str] or ``None``
``model_names`` : [str] or ``None``
A list of model path written in the second column of the five column score file.
If not given, the model index in the OpenBR file will be used.
.. note::
This entry is ignored in the four column score file format.
probe_names : [str] or ``None``
``probe_names`` : [str] or ``None``
A list of probe path to be written in the third/fourth column in the four/five column score file.
If given, the size must be identical to the number of probe templates in the OpenBR files.
If not given, the probe index in the OpenBR file will be used.
score_file_format : one of ``('4column', '5column')``
The format, in which the ``score_file`` should be written.
``score_file_format`` : one of ``('4column', '5column')``
The format, in which the ``score_file`` should be written; defaults to ``'4column'``.
replace_nan : float or ``None``:
``replace_nan`` : float or ``None``:
If NaN values are encountered in the OpenBR matrix (which are not ignored due to the mask being non-NULL), this value will be written instead.
If ``None``, the values will not be written in the score file at all.
"""
......@@ -301,7 +301,6 @@ def write_score_file(
if probe_names is None:
probe_names = [str(p+1) for p in range(mask.shape[0])]
# iterate through the files and write scores
with open(score_file, 'w') as f:
for g in range(mask.shape[1]):
......
......@@ -8,108 +8,84 @@
def roc(negatives, positives, npoints=100, CAR=False, **kwargs):
"""Plots Receiver Operating Charactaristic (ROC) curve.
This method will call matplotlib to plot the ROC curve for a system which
This method will call ``matplotlib`` to plot the ROC curve for a system which
contains a particular set of negatives (impostors) and positives (clients)
scores. We use the standard matplotlib.pyplot.plot() command. All parameters
passed with exeception of the three first parameters of this method will be
directly passed to the plot command. If you wish to understand your options,
look here:
scores. We use the standard :py:func:`matplotlib.pyplot.plot` command. All parameters
passed with exception of the three first parameters of this method will be
directly passed to the plot command.
http://matplotlib.sourceforge.net/api/pyplot_api.html#matplotlib.pyplot.plot
The plot will represent the false-alarm on the horizontal axis and the false-rejection on the vertical axis.
The values for the axis will be computed using :py:func:`bob.measure.roc`.
The plot will represent the false-alarm on the vertical axis and the
false-rejection on the horizontal axis.
.. note::
Input arguments:
This function does not initiate and save the figure instance, it only
issues the plotting command. You are the responsible for setting up and
saving the figure as you see fit.
negatives
a blitz array of negative class scores in float64 format
**Parameters:**
positives
a blitz array of positive class scores in float64 format
``negatives, positives`` : array_like(1D, float)
The list of negative and positive scores forwarded to :py:func:`bob.measure.roc`
npoints
number of points to use when drawing the ROC curve
``npoints`` : int
The number of points forwarded to :py:func:`bob.measure.roc`
CAR
plot CAR over FAR in semilogx (CAR=True) or FAR over FRR linearly (CAR=False, the default)
``CAR`` : bool
If set to ``True``, it will plot the CAR over FAR in using :py:func:`matplotlib.pyplot.semilogx`, otherwise the FAR over FRR linearly using :py:func:`matplotlib.pyplot.plot`.
kwargs
a dictionary of extra plotting parameters, that is passed directly to
matplotlib.pyplot.plot().
``kwargs`` : keyword arguments
Extra plotting parameters, which are passed directly to :py:func:`matplotlib.pyplot.plot`.
.. note::
**Returns:**
This function does not initiate and save the figure instance, it only
issues the plotting command. You are the responsible for setting up and
saving the figure as you see fit.
Return value is the matplotlib line that was added as defined by the
matplotlib.pyplot.plot() command.
The return value is the matplotlib line that was added as defined by :py:func:`matplotlib.pyplot.plot` or :py:func:`matplotlib.pyplot.semilogx`.
"""
try:
import matplotlib.pyplot as mpl
except ImportError:
print("Cannot import matplotlib. This package is not essential, but required if you wish to use the plotting functionality.")
raise
from matplotlib import pyplot
from . import roc as calc
out = calc(negatives, positives, npoints)
if not CAR:
return mpl.plot(100.0*out[0,:], 100.0*out[1,:], **kwargs)
return pyplot.plot(100.0*out[0,:], 100.0*out[1,:], **kwargs)
else:
return mpl.semilogx(100.0*out[0,:], 100.0*(1-out[1,:]), **kwargs)
return pyplot.semilogx(100.0*out[0,:], 100.0*(1-out[1,:]), **kwargs)
def precision_recall_curve(negatives, positives, npoints=100, **kwargs):
"""Plots Precision-Recall curve.
This method will call matplotlib to plot the precision-recall curve for a system which
contains a particular set of negatives (impostors) and positives (clients)
scores. We use the standard matplotlib.pyplot.plot() command. All parameters
passed with exeception of the three first parameters of this method will be
directly passed to the plot command. If you wish to understand your options,
look here:
http://matplotlib.sourceforge.net/api/pyplot_api.html#matplotlib.pyplot.plot
The plot will represent the false-alarm on the vertical axis and the
false-rejection on the horizontal axis.
This method will call ``matplotlib`` to plot the precision-recall curve for a system which
contains a particular set of ``negatives`` (impostors) and ``positives`` (clients)
scores. We use the standard :py:func:`matplotlib.pyplot.plot` command. All parameters