From 75077dec964cee0e21e420f1f346e368fab46428 Mon Sep 17 00:00:00 2001
From: Theophile GENTILHOMME <tgentilhomme@jurasix08.idiap.ch>
Date: Fri, 16 Mar 2018 16:29:56 +0100
Subject: [PATCH] fix documentation

---
 bob/bio/base/score/__init__.py |   2 +-
 bob/bio/base/score/load.py     |  14 +++--
 bob/bio/base/tools/scoring.py  |   9 ++-
 doc/index.rst                  |   1 +
 doc/openbr.rst                 | 105 +++++++++++++++++++++++++++++++++
 doc/py_api.rst                 |  23 +++++++-
 6 files changed, 145 insertions(+), 9 deletions(-)
 create mode 100644 doc/openbr.rst

diff --git a/bob/bio/base/score/__init__.py b/bob/bio/base/score/__init__.py
index 0843f864..df3dd386 100644
--- a/bob/bio/base/score/__init__.py
+++ b/bob/bio/base/score/__init__.py
@@ -1,6 +1,6 @@
 from .load import (open_file, split, cmc, split_four_column, four_column,
                    split_five_column, five_column, scores, load_score,
-                   dump_score
+                   dump_score, cmc_four_column, cmc_five_column
                   )
 from .openbr import (write_matrix, write_score_file)
 
diff --git a/bob/bio/base/score/load.py b/bob/bio/base/score/load.py
index b7de295f..1390bcc6 100644
--- a/bob/bio/base/score/load.py
+++ b/bob/bio/base/score/load.py
@@ -268,7 +268,9 @@ def scores(filename, ncolumns=None):
   """scores(filename, ncolumns=None) -> tuple
 
   Loads the scores from the given score file and yield its lines.
-  Depending on the score file format, four or five elements are yielded, see :py:func:`bob.bio.base.score.four_column` and :py:func:`bob.bio.base.score.five_column` for details.
+  Depending on the score file format, four or five elements are yielded, see
+  :py:func:`bob.bio.base.score.load.four_column` and
+  :py:func:`bob.bio.base.score.load.five_column` for details.
 
   Parameters:
 
@@ -281,7 +283,8 @@ def scores(filename, ncolumns=None):
   Yields:
 
   tuple:
-    see :py:func:`bob.bio.base.score.four_column` or :py:func:`bob.bio.base.score.five_column`
+    see :py:func:`bob.bio.base.score.load.four_column` or
+    :py:func:`bob.bio.base.score.load.five_column`
   """
   return _iterate_score_file(filename)
 
@@ -291,7 +294,8 @@ def split(filename, ncolumns=None):
 
   Loads the scores from the given score file and splits them into positives and negatives.
 
-  Depending on the score file format, it calls see :py:func:`bob.bio.base.score.split_four_column` and `:py:func:`bob.bio.base.score.split_five_column` for details.
+  Depending on the score file format, it calls see :py:func:`split_four_column` 
+  and :py:func:`split_five_column` for details.
 
   Parameters:
 
@@ -324,7 +328,9 @@ def cmc(filename, ncolumns=None):
 
   Loads scores to compute CMC curves.
 
-  Depending on the score file format, it calls see :py:func:`bob.bio.base.score.cmc_four_column` and `:py:func:`bob.bio.base.score.cmc_five_column` for details.
+  Depending on the score file format, it calls see
+  :py:func:`bob.bio.base.score.load.cmc_four_column` and
+  `:py:func:`bob.bio.base.score.load.cmc_five_column` for details.
 
   Parameters:
 
diff --git a/bob/bio/base/tools/scoring.py b/bob/bio/base/tools/scoring.py
index da03215c..eb634b96 100644
--- a/bob/bio/base/tools/scoring.py
+++ b/bob/bio/base/tools/scoring.py
@@ -62,7 +62,7 @@ def _scores(algorithm, reader, model, probe_objects, allow_missing_files):
 
 
 def _open_to_read(score_file):
-  """Checks for the existence of the normal and the compressed version of the file, and calls :py:func:`score.open_file` for the existing one."""
+  """Checks for the existence of the normal and the compressed version of the file, and calls :py:func:`bob.bio.base.score.open_file` for the existing one."""
   if not os.path.exists(score_file):
     score_file += '.tar.bz2'
     if not os.path.exists(score_file):
@@ -115,7 +115,8 @@ def _delete(score_file, write_compressed):
 
 
 def _save_scores(score_file, scores, probe_objects, client_id, write_compressed):
-  """Saves the scores of one model into a text file that can be interpreted by :py:func:`score.split_four_column`."""
+  """Saves the scores of one model into a text file that can be interpreted by
+  :py:func:`bob.bio.base.score.split_four_column`."""
   assert len(probe_objects) == scores.shape[1]
 
   # open file for writing
@@ -493,7 +494,9 @@ def _concat(score_files, output, write_compressed, model_ids):
 def concatenate(compute_zt_norm, groups = ['dev', 'eval'], write_compressed = False, add_model_id = False):
   """Concatenates all results into one (or two) score files per group.
 
-  Score files, which were generated per model, are concatenated into a single score file, which can be interpreter by :py:func:`score.split_four_column`.
+  Score files, which were generated per model, are concatenated into a single
+  score file, which can be interpreter by
+  :py:func:`bob.bio.base.score.load.split_four_column`.
   The score files are always re-computed, regardless if they exist or not.
 
   **Parameters:**
diff --git a/doc/index.rst b/doc/index.rst
index 59c610e4..2b966a5d 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -78,6 +78,7 @@ Users Guide
    filelist-guide
    more
    annotations
+   openbr
 
 
 Reference Manual
diff --git a/doc/openbr.rst b/doc/openbr.rst
new file mode 100644
index 00000000..348d3b77
--- /dev/null
+++ b/doc/openbr.rst
@@ -0,0 +1,105 @@
+
+.. _bob.bio.base.openbr:
+
+=====================
+Score file conversion
+=====================
+Sometimes, it is required to export the score files generated by Bob to a
+different format, e.g., to be able to generate a plot comparing Bob's systems
+with other systems.  In this package, we provide source code to convert between
+different types of score files.
+
+Bob to OpenBR
+-------------
+One of the supported formats is the matrix format that the National Institute
+of Standards and Technology (NIST) uses, and which is supported by OpenBR_.
+The scores are stored in two binary matrices, where the first matrix (usually
+with a ``.mtx`` filename extension) contains the raw scores, while a second
+mask matrix (extension ``.mask``) contains information, which scores are
+positives, and which are negatives.
+To convert from Bob's four column or five column score file to a pair of these
+matrices, you can use the :py:func:`bob.bio.base.score.openbr.write_matrix` function.
+In the simplest way, this function takes a score file
+``'five-column-sore-file'`` and writes the pair ``'openbr.mtx', 'openbr.mask'``
+of OpenBR_ compatible files:
+
+.. code-block:: py
+
+    >>> bob.bio.base.score.openbr.write_matrix('five-column-sore-file', 'openbr.mtx', 'openbr.mask', score_file_format = '5column')
+
+In this way, the score file will be parsed and the matrices will be written in
+the same order that is obtained from the score file.
+For most of the applications, this should be sufficient, but as the identity
+information is lost in the matrix files, no deeper analysis is possible anymore
+when just using the matrices.  To enforce an order of the models and probes
+inside the matrices, you can use the ``model_names`` and ``probe_names``
+parameters of :py:func:`bob.bio.base.score.openbr.write_matrix`:
+
+* The ``probe_names`` parameter lists the ``path`` elements stored in the score
+  files, which are the fourth column in a ``5column`` file, and the third
+  column in a ``4column`` file, see :py:func:`bob.bio.base.score.load.five_column` and
+  :py:func:`bob.bio.base.score.load.four_column`.
+
+* The ``model_names`` parameter is a bit more complicated.  In a ``5column``
+  format score file, the model names are defined by the second column of that
+  file, see :py:func:`bob.bio.base.score.load.five_column`.  In a ``4column`` format
+  score file, the model information is not contained, but only the client
+  information of the model.  Hence, for the ``4column`` format, the
+  ``model_names`` actually lists the client ids found in the first column, see
+  :py:func:`bob.bio.base.score.load.four_column`.
+
+.. warning::
+
+  The model information is lost, but required to write the matrix files.  In
+  the ``4column`` format, we use client ids instead of the model
+  information.  Hence, when several models exist per client, this function
+  will not work as expected.
+
+Additionally, there are fields in the matrix files, which define the gallery
+and probe list files that were used to generate the matrix.  These file names
+can be selected with the ``gallery_file_name`` and ``probe_file_name`` keyword
+parameters of :py:func:`bob.bio.base.score.openbr.write_matrix`.
+Finally, OpenBR defines a specific ``'search'`` score file format, which is
+designed to be used to compute CMC curves.  The score matrix contains
+descendingly sorted and possibly truncated list of scores, i.e., for each
+probe, a sorted list of all scores for the models is generated.  To generate
+these special score file format, you can specify the ``search`` parameter.  It
+specifies the number of highest scores per probe that should be kept.  If the
+``search`` parameter is set to a negative value, all scores will be kept.  If
+the ``search`` parameter is higher as the actual number of models, ``NaN``
+scores will be appended, and the according mask values will be set to ``0``
+(i.e., to be ignored).
+OpenBR to Bob
+-------------
+On the other hand, you might also want to generate a Bob-compatible (four or
+five column) score file based on a pair of OpenBR matrix and mask files.  This
+is possible by using the :py:func:`bob.bio.base.score.openbr.write_score_file`
+function.  At the basic, it takes the given pair of matrix and mask files, as
+well as the desired output score file:
+
+.. code-block:: py
+
+    >>> bob.bio.base.score.openbr.write_score_file('openbr.mtx', 'openbr.mask', 'four-column-sore-file')
+
+This score file is sufficient to compute a CMC curve (see `bob.measure`), however it
+does not contain relevant client ids or paths for models and probes.
+Particularly, it assumes that each client has exactly one associated model.
+To add/correct these information, you can use additional parameters to
+:py:func:`bob.bio.base.score.openbr.write_score_file`.  Client ids of models and
+probes can be added using the ``models_ids`` and ``probes_ids`` keyword
+arguments.  The length of these lists must be identical to the number of models
+and probes as given in the matrix files, **and they must be in the same order
+as used to compute the OpenBR matrix**.  This includes that the same
+same-client and different-client pairs as indicated by the OpenBR mask will be
+generated, which will be checked inside the function.
+To add model and probe path information, the ``model_names`` and
+``probe_names`` parameters, which need to have the same size and order as the
+``models_ids`` and ``probes_ids``.  These information are simply stored in the
+score file, and no further check is applied.
+
+.. note:: The ``model_names`` parameter is used only when writing score files in ``score_file_format='5column'``, in the ``'4column'`` format, this parameter is ignored.
+
+
+.. Place youre references here:
+.. _openbr: http://openbiometrics.org
+
diff --git a/doc/py_api.rst b/doc/py_api.rst
index c9841b2b..4d52687c 100644
--- a/doc/py_api.rst
+++ b/doc/py_api.rst
@@ -94,6 +94,26 @@ Scoring
    bob.bio.base.tools.concatenate
    bob.bio.base.tools.calibrate
 
+Loading data
+------------
+.. autosummary::
+   bob.bio.base.score.load.open_file
+   bob.bio.base.score.load.scores
+   bob.bio.base.score.load.split
+   bob.bio.base.score.load.cmc
+   bob.bio.base.score.load.four_column
+   bob.bio.base.score.load.split_four_column
+   bob.bio.base.score.load.cmc_four_column
+   bob.bio.base.score.load.five_column
+   bob.bio.base.score.load.split_five_column
+   bob.bio.base.score.load.cmc_five_column
+
+OpenBR conversions
+------------------
+.. autosummary::
+   bob.bio.base.score.openbr.write_matrix
+   bob.bio.base.score.openbr.write_score_file
+
 Details
 -------
 
@@ -108,5 +128,6 @@ Details
 
    .. autoclass:: FileSelector
 
-
+.. automodule:: bob.bio.base.score.load
+.. automodule:: bob.bio.base.score.openbr
 .. include:: links.rst
-- 
GitLab