From 75077dec964cee0e21e420f1f346e368fab46428 Mon Sep 17 00:00:00 2001 From: Theophile GENTILHOMME <tgentilhomme@jurasix08.idiap.ch> Date: Fri, 16 Mar 2018 16:29:56 +0100 Subject: [PATCH] fix documentation --- bob/bio/base/score/__init__.py | 2 +- bob/bio/base/score/load.py | 14 +++-- bob/bio/base/tools/scoring.py | 9 ++- doc/index.rst | 1 + doc/openbr.rst | 105 +++++++++++++++++++++++++++++++++ doc/py_api.rst | 23 +++++++- 6 files changed, 145 insertions(+), 9 deletions(-) create mode 100644 doc/openbr.rst diff --git a/bob/bio/base/score/__init__.py b/bob/bio/base/score/__init__.py index 0843f864..df3dd386 100644 --- a/bob/bio/base/score/__init__.py +++ b/bob/bio/base/score/__init__.py @@ -1,6 +1,6 @@ from .load import (open_file, split, cmc, split_four_column, four_column, split_five_column, five_column, scores, load_score, - dump_score + dump_score, cmc_four_column, cmc_five_column ) from .openbr import (write_matrix, write_score_file) diff --git a/bob/bio/base/score/load.py b/bob/bio/base/score/load.py index b7de295f..1390bcc6 100644 --- a/bob/bio/base/score/load.py +++ b/bob/bio/base/score/load.py @@ -268,7 +268,9 @@ def scores(filename, ncolumns=None): """scores(filename, ncolumns=None) -> tuple Loads the scores from the given score file and yield its lines. - Depending on the score file format, four or five elements are yielded, see :py:func:`bob.bio.base.score.four_column` and :py:func:`bob.bio.base.score.five_column` for details. + Depending on the score file format, four or five elements are yielded, see + :py:func:`bob.bio.base.score.load.four_column` and + :py:func:`bob.bio.base.score.load.five_column` for details. Parameters: @@ -281,7 +283,8 @@ def scores(filename, ncolumns=None): Yields: tuple: - see :py:func:`bob.bio.base.score.four_column` or :py:func:`bob.bio.base.score.five_column` + see :py:func:`bob.bio.base.score.load.four_column` or + :py:func:`bob.bio.base.score.load.five_column` """ return _iterate_score_file(filename) @@ -291,7 +294,8 @@ def split(filename, ncolumns=None): Loads the scores from the given score file and splits them into positives and negatives. - Depending on the score file format, it calls see :py:func:`bob.bio.base.score.split_four_column` and `:py:func:`bob.bio.base.score.split_five_column` for details. + Depending on the score file format, it calls see :py:func:`split_four_column` + and :py:func:`split_five_column` for details. Parameters: @@ -324,7 +328,9 @@ def cmc(filename, ncolumns=None): Loads scores to compute CMC curves. - Depending on the score file format, it calls see :py:func:`bob.bio.base.score.cmc_four_column` and `:py:func:`bob.bio.base.score.cmc_five_column` for details. + Depending on the score file format, it calls see + :py:func:`bob.bio.base.score.load.cmc_four_column` and + `:py:func:`bob.bio.base.score.load.cmc_five_column` for details. Parameters: diff --git a/bob/bio/base/tools/scoring.py b/bob/bio/base/tools/scoring.py index da03215c..eb634b96 100644 --- a/bob/bio/base/tools/scoring.py +++ b/bob/bio/base/tools/scoring.py @@ -62,7 +62,7 @@ def _scores(algorithm, reader, model, probe_objects, allow_missing_files): def _open_to_read(score_file): - """Checks for the existence of the normal and the compressed version of the file, and calls :py:func:`score.open_file` for the existing one.""" + """Checks for the existence of the normal and the compressed version of the file, and calls :py:func:`bob.bio.base.score.open_file` for the existing one.""" if not os.path.exists(score_file): score_file += '.tar.bz2' if not os.path.exists(score_file): @@ -115,7 +115,8 @@ def _delete(score_file, write_compressed): def _save_scores(score_file, scores, probe_objects, client_id, write_compressed): - """Saves the scores of one model into a text file that can be interpreted by :py:func:`score.split_four_column`.""" + """Saves the scores of one model into a text file that can be interpreted by + :py:func:`bob.bio.base.score.split_four_column`.""" assert len(probe_objects) == scores.shape[1] # open file for writing @@ -493,7 +494,9 @@ def _concat(score_files, output, write_compressed, model_ids): def concatenate(compute_zt_norm, groups = ['dev', 'eval'], write_compressed = False, add_model_id = False): """Concatenates all results into one (or two) score files per group. - Score files, which were generated per model, are concatenated into a single score file, which can be interpreter by :py:func:`score.split_four_column`. + Score files, which were generated per model, are concatenated into a single + score file, which can be interpreter by + :py:func:`bob.bio.base.score.load.split_four_column`. The score files are always re-computed, regardless if they exist or not. **Parameters:** diff --git a/doc/index.rst b/doc/index.rst index 59c610e4..2b966a5d 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -78,6 +78,7 @@ Users Guide filelist-guide more annotations + openbr Reference Manual diff --git a/doc/openbr.rst b/doc/openbr.rst new file mode 100644 index 00000000..348d3b77 --- /dev/null +++ b/doc/openbr.rst @@ -0,0 +1,105 @@ + +.. _bob.bio.base.openbr: + +===================== +Score file conversion +===================== +Sometimes, it is required to export the score files generated by Bob to a +different format, e.g., to be able to generate a plot comparing Bob's systems +with other systems. In this package, we provide source code to convert between +different types of score files. + +Bob to OpenBR +------------- +One of the supported formats is the matrix format that the National Institute +of Standards and Technology (NIST) uses, and which is supported by OpenBR_. +The scores are stored in two binary matrices, where the first matrix (usually +with a ``.mtx`` filename extension) contains the raw scores, while a second +mask matrix (extension ``.mask``) contains information, which scores are +positives, and which are negatives. +To convert from Bob's four column or five column score file to a pair of these +matrices, you can use the :py:func:`bob.bio.base.score.openbr.write_matrix` function. +In the simplest way, this function takes a score file +``'five-column-sore-file'`` and writes the pair ``'openbr.mtx', 'openbr.mask'`` +of OpenBR_ compatible files: + +.. code-block:: py + + >>> bob.bio.base.score.openbr.write_matrix('five-column-sore-file', 'openbr.mtx', 'openbr.mask', score_file_format = '5column') + +In this way, the score file will be parsed and the matrices will be written in +the same order that is obtained from the score file. +For most of the applications, this should be sufficient, but as the identity +information is lost in the matrix files, no deeper analysis is possible anymore +when just using the matrices. To enforce an order of the models and probes +inside the matrices, you can use the ``model_names`` and ``probe_names`` +parameters of :py:func:`bob.bio.base.score.openbr.write_matrix`: + +* The ``probe_names`` parameter lists the ``path`` elements stored in the score + files, which are the fourth column in a ``5column`` file, and the third + column in a ``4column`` file, see :py:func:`bob.bio.base.score.load.five_column` and + :py:func:`bob.bio.base.score.load.four_column`. + +* The ``model_names`` parameter is a bit more complicated. In a ``5column`` + format score file, the model names are defined by the second column of that + file, see :py:func:`bob.bio.base.score.load.five_column`. In a ``4column`` format + score file, the model information is not contained, but only the client + information of the model. Hence, for the ``4column`` format, the + ``model_names`` actually lists the client ids found in the first column, see + :py:func:`bob.bio.base.score.load.four_column`. + +.. warning:: + + The model information is lost, but required to write the matrix files. In + the ``4column`` format, we use client ids instead of the model + information. Hence, when several models exist per client, this function + will not work as expected. + +Additionally, there are fields in the matrix files, which define the gallery +and probe list files that were used to generate the matrix. These file names +can be selected with the ``gallery_file_name`` and ``probe_file_name`` keyword +parameters of :py:func:`bob.bio.base.score.openbr.write_matrix`. +Finally, OpenBR defines a specific ``'search'`` score file format, which is +designed to be used to compute CMC curves. The score matrix contains +descendingly sorted and possibly truncated list of scores, i.e., for each +probe, a sorted list of all scores for the models is generated. To generate +these special score file format, you can specify the ``search`` parameter. It +specifies the number of highest scores per probe that should be kept. If the +``search`` parameter is set to a negative value, all scores will be kept. If +the ``search`` parameter is higher as the actual number of models, ``NaN`` +scores will be appended, and the according mask values will be set to ``0`` +(i.e., to be ignored). +OpenBR to Bob +------------- +On the other hand, you might also want to generate a Bob-compatible (four or +five column) score file based on a pair of OpenBR matrix and mask files. This +is possible by using the :py:func:`bob.bio.base.score.openbr.write_score_file` +function. At the basic, it takes the given pair of matrix and mask files, as +well as the desired output score file: + +.. code-block:: py + + >>> bob.bio.base.score.openbr.write_score_file('openbr.mtx', 'openbr.mask', 'four-column-sore-file') + +This score file is sufficient to compute a CMC curve (see `bob.measure`), however it +does not contain relevant client ids or paths for models and probes. +Particularly, it assumes that each client has exactly one associated model. +To add/correct these information, you can use additional parameters to +:py:func:`bob.bio.base.score.openbr.write_score_file`. Client ids of models and +probes can be added using the ``models_ids`` and ``probes_ids`` keyword +arguments. The length of these lists must be identical to the number of models +and probes as given in the matrix files, **and they must be in the same order +as used to compute the OpenBR matrix**. This includes that the same +same-client and different-client pairs as indicated by the OpenBR mask will be +generated, which will be checked inside the function. +To add model and probe path information, the ``model_names`` and +``probe_names`` parameters, which need to have the same size and order as the +``models_ids`` and ``probes_ids``. These information are simply stored in the +score file, and no further check is applied. + +.. note:: The ``model_names`` parameter is used only when writing score files in ``score_file_format='5column'``, in the ``'4column'`` format, this parameter is ignored. + + +.. Place youre references here: +.. _openbr: http://openbiometrics.org + diff --git a/doc/py_api.rst b/doc/py_api.rst index c9841b2b..4d52687c 100644 --- a/doc/py_api.rst +++ b/doc/py_api.rst @@ -94,6 +94,26 @@ Scoring bob.bio.base.tools.concatenate bob.bio.base.tools.calibrate +Loading data +------------ +.. autosummary:: + bob.bio.base.score.load.open_file + bob.bio.base.score.load.scores + bob.bio.base.score.load.split + bob.bio.base.score.load.cmc + bob.bio.base.score.load.four_column + bob.bio.base.score.load.split_four_column + bob.bio.base.score.load.cmc_four_column + bob.bio.base.score.load.five_column + bob.bio.base.score.load.split_five_column + bob.bio.base.score.load.cmc_five_column + +OpenBR conversions +------------------ +.. autosummary:: + bob.bio.base.score.openbr.write_matrix + bob.bio.base.score.openbr.write_score_file + Details ------- @@ -108,5 +128,6 @@ Details .. autoclass:: FileSelector - +.. automodule:: bob.bio.base.score.load +.. automodule:: bob.bio.base.score.openbr .. include:: links.rst -- GitLab