Fix save_data -> write_data changes; Updated docs

e04e8f9c · André Anjos · 654b700f · e04e8f9c · e04e8f9c · e04e8f9c
Commit e04e8f9c authored 8 years ago by André Anjos
--- a/bob/bio/vein/algorithms/HammingDistance.py
+++ b/bob/bio/vein/algorithms/HammingDistance.py
+#!/usr/bin/env python
+# vim: set fileencoding=utf-8 :
+import bob.ip.base
+import numpy
+import scipy.signal
+from bob.bio.base.algorithm import Algorithm
+class HammingDistance (Algorithm):
+  """Finger vein matching: hamming distance
+  """
+  def __init__(
+      self,
+      # some similarity functions might need a GaborWaveletTransform class, so we have to provide the parameters here as well...
+      ch = 8,       # Maximum search displacement in y-direction
+      cw = 5,       # Maximum search displacement in x-direction
+      gpu = False,
+  ):
+    # call base class constructor
+    Algorithm.__init__(
+        self,
+        ch = ch,
+        cw = cw,
+        multiple_model_scoring = None,
+        multiple_probe_scoring = None
+    )
+    self.ch = ch
+    self.cw = cw
+    self.gpu = gpu
+  def enroll(self, enroll_features):
+    """Enrolls the model by computing an average graph for each model"""
+    # return the generated model
+    return numpy.vstack(enroll_features)
+  def score(self, model, probe):
+    """Computes the score of the probe and the model
+         Return score - Value between 0 and 0.5, larger value is better match
+    """
+    I=probe.astype(numpy.float64)
+    R=model.astype(numpy.float64)
+    h, w = R.shape
+    crop_R = R[self.ch:h-self.ch, self.cw:w-self.cw]
+    rotate_R = numpy.zeros((crop_R.shape[0], crop_R.shape[1]))
+    bob.ip.base.rotate(crop_R, rotate_R, 180)
+    #FFT for scoring!
+    #Nm=bob.sp.ifft(bob.sp.fft(I)*bob.sp.fft(rotate_R))
+    if self.gpu == True:
+        import xbob.cusp
+        Nm = xbob.cusp.conv(I, rotate_R);
+    else:
+        Nm = scipy.signal.convolve2d(I, rotate_R, 'valid');
+    t0, s0 = numpy.unravel_index(Nm.argmax(), Nm.shape)
+    Nmm = Nm[t0,s0]
+    #Nmm = Nm.max()
+    #mi = numpy.argwhere(Nmm == Nm)
+    #t0, s0 = mi.flatten()[:2]
+    score = Nmm/(sum(sum(crop_R)) + sum(sum(I[t0:t0+h-2*self.ch, s0:s0+w-2*self.cw])))
+    return score
--- a/bob/bio/vein/extractors/LocalBinaryPatterns.py
+++ b/bob/bio/vein/extractors/LocalBinaryPatterns.py
@@ -123,13 +123,3 @@ class LocalBinaryPatterns (Extractor):
    finger_mask = image[1]
    return self.lbp_features(finger_image, finger_mask)
-  def save_feature(self, feature, feature_file):
-    f = bob.io.base.HDF5File(feature_file, 'w')
-    f.set('feature', feature)
-  def read_feature(self, feature_file):
-    f = bob.io.base.HDF5File(feature_file, 'r')
-    image = f.read('feature')
-    return image
--- a/bob/bio/vein/extractors/MaximumCurvature.py
+++ b/bob/bio/vein/extractors/MaximumCurvature.py
@@ -267,13 +267,3 @@ class MaximumCurvature (Extractor):
    finger_mask = image[1]
    return self.maximum_curvature(finger_image, finger_mask)
-  def save_feature(self, feature, feature_file):
-    f = bob.io.base.HDF5File(feature_file, 'w')
-    f.set('feature', feature)
-  def read_feature(self, feature_file):
-    f = bob.io.base.HDF5File(feature_file, 'r')
-    image = f.read('feature')
-    return image
--- a/bob/bio/vein/extractors/NormalisedCrossCorrelation.py
+++ b/bob/bio/vein/extractors/NormalisedCrossCorrelation.py
@@ -32,14 +32,3 @@ class NormalisedCrossCorrelation (Extractor):
    #TODO
    return image_vein.astype(numpy.float64)
-  def save_feature(self, feature, feature_file):
-    f = bob.io.base.HDF5File(feature_file, 'w')
-    f.set('feature', feature)
-  def read_feature(self, feature_file):
-    f = bob.io.base.HDF5File(feature_file, 'r')
-    image = f.read('feature')
-    return (image)
--- a/bob/bio/vein/extractors/PrincipalCurvature.py
+++ b/bob/bio/vein/extractors/PrincipalCurvature.py
@@ -94,14 +94,3 @@ class PrincipalCurvature (Extractor):
    finger_mask = image[1]
    return self.principal_curvature(finger_image, finger_mask)
-  def save_feature(self, feature, feature_file):
-    f = bob.io.base.HDF5File(feature_file, 'w')
-    f.set('feature', feature)
-  def read_feature(self, feature_file):
-    f = bob.io.base.HDF5File(feature_file, 'r')
-    image = f.read('feature')
-    return (image)
--- a/bob/bio/vein/extractors/RepeatedLineTracking.py
+++ b/bob/bio/vein/extractors/RepeatedLineTracking.py
@@ -234,13 +234,3 @@ class RepeatedLineTracking (Extractor):
    finger_mask = image[1]
    return self.repeated_line_tracking(finger_image, finger_mask)
-  def save_feature(self, feature, feature_file):
-    f = bob.io.base.HDF5File(feature_file, 'w')
-    f.set('feature', feature)
-  def read_feature(self, feature_file):
-    f = bob.io.base.HDF5File(feature_file, 'r')
-    image = f.read('feature')
-    return (image)
--- a/bob/bio/vein/extractors/WideLineDetector.py
+++ b/bob/bio/vein/extractors/WideLineDetector.py
@@ -94,14 +94,3 @@ class WideLineDetector (Extractor):
    finger_mask = image[1]
    return self.wide_line_detector(finger_image, finger_mask)
-  def save_feature(self, feature, feature_file):
-    f = bob.io.base.HDF5File(feature_file, 'w')
-    f.set('feature', feature)
-  def read_feature(self, feature_file):
-    f = bob.io.base.HDF5File(feature_file, 'r')
-    image = f.read('feature')
-    return (image)
--- a/bob/bio/vein/preprocessors/FingerCrop.py
+++ b/bob/bio/vein/preprocessors/FingerCrop.py
@@ -6,6 +6,7 @@ import numpy
 from PIL import Image
 import bob.io.base
+import bob.io.image
 import bob.ip.base
 import bob.sp
 import bob.core
@@ -505,7 +506,7 @@ class FingerCrop (Preprocessor):
    return self.crop_finger(image)
-  def save_data(self, image, image_file):
+  def write_data(self, image, image_file):
    f = bob.io.base.HDF5File(image_file, 'w')
    f.set('image', image[0])
    f.set('finger_mask', image[1])

--- a/doc/api.rst
+++ b/doc/api.rst
@@ -2,6 +2,8 @@
 .. Mon 11 Jul 2016 16:39:15 CEST
+.. _bob.bio.vein.api:
 ============
 Python API
 ============

--- a/doc/baselines.rst
+++ b/doc/baselines.rst
@@ -3,12 +3,6 @@
 .. _bob.bio.vein.baselines:
-.. warning::
-   This document was copied from ``bob.bio.spear`` and is not updated. We're
-   working on it. Please don't use it as of now.
 ===============================
 Executing Baseline Algorithms
 ===============================
@@ -17,16 +11,36 @@ The first thing you might want to do is to execute one of the vein
 recognition algorithms that are implemented in ``bob.bio.vein``.
-Setting up your Database
+Setting up Databases
------------------------
+--------------------
+In order to run vein recognition algorithms using this package, you'll need to
+make sure to download the raw files corresponding to the databases you'd like
+to process. The raw files are not distributed with Bob_ software as biometric
+data is, to most countries, considered sensible data that cannot be obtained
+without explicit licensing from a data controller. You must visit the websites
+below, sign the license agreements and then download the data before trying out
+to run the baselines.
+.. note::
+   If you're at the Idiap Research Institute in Switzlerand, the datasets in
+   the baselines mentioned in this guide are already downloaded and
+   pre-installed on our shared file system. You don't need to re-download
+   databases or create a ``~/.bob_bio_databases.txt`` file.
+The current system readily supports the following freely available datasets:
-For example, you can easily download the audio samples of the `PUT`_
+* ``vera``: `Vera Fingervein`_
-database.
+* ``utfvp``: `UTFVP`_
+* ``put``: `PUT`_ Vein Dataset
-By default, ``bob.bio.vein`` does not know, where the original database files
-are located. Hence, before running experiments you have to specify the raw
+After downloading the databases, annotate the base directories in which they
-database directories. How this is done is explained in more detail in the
+are installed. Then, follow the instructions in
-:ref:`bob.bio.base.installation`.
+:ref:`bob.bio.base.installation` to let this framework know where databases are
+located on your system.
 Running Baseline Experiments
@@ -41,205 +55,72 @@ just going to the console and typing:
 This script is explained in more detail in :ref:`bob.bio.base.experiments`.
-The ``./bin/verify.py --help`` option shows you, which other options you have.
+The ``./bin/verify.py --help`` option shows you, which other options you can
-Here is an almost complete extract:
+set.
-* ``--database``: The database and protocol you want to use.
-* ``--algorithms``: The recognition algorithms that you want to execute.
-* ``--all``: Execute all algorithms that are implemented.
-* ``--temp-directory``: The directory where temporary files of the experiments
-  are put to.
-* ``--result-directory``: The directory where resulting score files of the
-  experiments are put to.
-* ``--evaluate``: After running the experiments, the resulting score files will
-  be evaluated, and the result is written to console.
-* ``--dry-run``: Instead of executing the algorithm (or the evaluation), only
-  print the command that would have been executed.
-* ``--verbose``: Increase the verbosity level of the script.
-  By default, only the commands that are executed are printed, and the rest of
-  the calculation runs quietly. You can increase the verbosity by adding the
-  ``--verbose`` parameter repeatedly (up to three times).
 Usually it is a good idea to have at least verbose level 2 (i.e., calling
-``./bin/verify.py --verbose --verbose``, or the short version
+``./bin/verify.py --verbose --verbose``, or the short version ``./bin/verify.py
-``./bin/verify.py -vv``).
+-vv``).
-Running in Parallel
-~~~~~~~~~~~~~~~~~~~
-To run the experiments in parallel, as usual, you can define an SGE grid
-configuration, or run with parallel threads on the local machine.  For the
-``./bin/verify.py`` script, the grid configuration is adapted to each of the
-algorithms.  Hence, to run in the SGE grid, you can simply add the ``--grid``
-command line option, without parameters.  Similarly, to run the experiments in
-parallel on the local machine, simply add a ``--parallel <N>`` option, where
-``<N>`` specifies the number of parallel jobs you want to execute.
-The Algorithms
--------------
-The algorithms present a set of state-of-the-art vein recognition algorithms.
-Here is the list of short-cuts:
-* ``gmm``: *Gaussian Mixture Models* (GMM) `[Rey00]`.
-  - algorithm : :py:class:`bob.bio.gmm.algorithm.GMM`
-* ``isv``: As an extension of the GMM algorithm, *Inter-Session Variability* (ISV) modeling `[Vogt08]` is used to learn what variations in samples are introduced by identity changes and which not.
-  - algorithm : :py:class:`bob.bio.gmm.algorithm.ISV`
-* ``ivector``: Another extension of the GMM algorithm is *Total Variability* (TV) modeling `[Dehak11]` (aka. I-Vector), which tries to learn a subspace in the GMM super-vector space.
-  - algorithm : :py:class:`bob.bio.gmm.algorithm.IVector`
-.. note::
-  The ``ivector`` algorithm needs a lot of training data and fails on small databases such as the `Voxforge`_ database.
-Evaluation Results
------------------
-To evaluate the results,  one can use ``./bin/evaluate.py`` command.  Several
-types of evaluation can be achieved, see :ref:`bob.bio.base.evaluate` for
-details.  Particularly, here we can enable ROC curves, DET plots, CMC curves
-and the computation of EER/HTER or minDCF.
-Experiments on different databases
----------------------------------
-To make you more familiar with the tool, we provide you examples of different
-toolchains applied on different databases: Voxforge, BANCA, TIMIT, MOBIO, and
-NIST SRE 2012.
-`Voxforge`_ is a free database used in free speech recognition engines. We
-randomly selected a small part of the english corpus (< 1GB).  It is used as a
-toy example for our speaker recognition tool since experiment can be easily run
-on a local machine, and the results can be obtained in a reasonnable amount of
-time (< 2h).
-Unlike TIMIT and BANCA, this dataset is completely free of charge.
-More details about how to download the audio files used in our experiments, and
-how the data is split into Training, Development and Evaluation set can be
-found here::
-  https://pypi.python.org/pypi/bob.db.putvein
+.. note:: **Running in Parallel**
-One example of command line is::
+   To run the experiments in parallel, you can define an SGE grid or local host
+   (multi-processing) configurations as explained in
+   :ref:`running_in_parallel`.
-  $ bin/verify.py  -d putvein -p energy-2gauss -e mfcc-60 -a gmm-voxforge -s ubm_gmm --groups {dev,eval}
+   In short, to run in the Idiap SGE grid, you can simply add the ``--grid``
+   command line option, without parameters. To run experiments in parallel on
+   the local machine, simply add a ``--parallel <N>`` option, where ``<N>``
+   specifies the number of parallel jobs you want to execute.
-In this example, we used the following configuration:
+In the remainder of this section we introduce baseline experiments you can
+readily run with this tool without further configuration.
-* Energy-based VAD,
-* (19 MFCC features + Energy) + First and second derivatives,
-* **UBM-GMM** Modelling (with 256 Gaussians), the scoring is done using the linear approximation of the LLR.
-The performance of the system on DEV and EVAL are:
+Repeated Line-Tracking with Miura Matching
+==========================================
-* ``DEV: EER = 1.89%``
+You can find the description of this method on the paper from Miura *et al.*
-* ``EVAL: HTER = 1.56%``
+[MNM04]_.
-If you want to run the same experiment on SGE::
+To run the baseline on the `VERA fingervein`_ database, using the ``1vsall``
+protocol (1-fold cross-validation), do the following:
-  $ bin/verify.py  -d voxforge -p energy-2gauss -e mfcc-60 -a gmm-voxforge -s ubm_gmm --groups {dev,eval}  -g grid
+.. code-block:: sh
-If you want to run the parallel implementation of the UBM on the SGE::
-  $ bin/verify_gmm.py  -d voxforge -p energy-2gauss -e mfcc-60 -a gmm-voxforge -s ubm_gmm_sge --groups {dev,eval} -g grid
-If you want to run the parallel implementation of the UBM on your local machine::
-  $ bin/verify_gmm.py  -d voxforge -p energy-2gauss -e mfcc-60 -a gmm-voxforge -s ubm_gmm_local --groups {dev,eval} -g local
-Another example is to use **ISV** toolchain instead of UBM-GMM::
-  $ bin/verify.py  -d voxforge -p energy-2gauss -e mfcc-60 -a isv-voxforge -s isv --groups {dev,eval} -g grid
-* ``DEV: EER = 1.41%``
-* ``EVAL: HTER = 1.52%``
-One can also try **JFA** toolchain::
-  $  bin/verify.py  -d voxforge -p energy-2gauss -e mfcc-60 -a jfa-voxforge -s jfa --groups {dev,eval} -g grid
-* ``DEV: EER = 4.04%``
-* ``EVAL: HTER = 5.11%``
-or also **IVector** toolchain where **Whitening, L-Norm, LDA, WCCN** are used like in this example where the score computation is done using **Cosine distance**::
-  $  bin/verify.py  -d voxforge -p energy-2gauss -e mfcc-60 -a ivec-cosine-voxforge -s ivec-cosine --groups {dev,eval} -g grid
-* ``DEV: EER = 7.33%``
-* ``EVAL: HTER = 13.80%``
-The scoring computation can also be done using **PLDA**::
-  $ bin/verify.py  -d voxforge -p energy-2gauss -e mfcc-60 -a ivec-plda-voxforge -s ivec-plda --groups {dev,eval} -g grid
-* ``DEV: EER = 11.33%``
-* ``EVAL: HTER = 13.15%``
-Note that in the previous examples, our goal is not to optimize the parameters on the DEV set but to provide examples of use.
-2. BANCA dataset
-~~~~~~~~~~~~~~~~
-`BANCA`_ is a simple bimodal database with relatively clean data. The results are already very good with a simple baseline UBM-GMM system. An example of use can be::
-  $ bin/verify.py -vv -d banca-audio -p energy-2gauss -e mfcc-60 -a gmm-banca -s banca_G --groups {dev,eval}
-The configuration in this example is similar to the previous one with the only difference of using the regular LLR instead of its linear approximation.
-Here is the performance of this system:
-* ``DEV: EER = 0.91%``
-* ``EVAL: EER = 0.75%``
-3. TIMIT dataset
-~~~~~~~~~~~~~~~~
-`TIMIT`_ is one of the oldest databases (year 1993) used to evaluate speaker recognition systems. In the following example, the processing is done on the development set, and LFCC features are used::
-  $ bin/verify.py -vv -d timit -p energy-2gauss -e lfcc-60 -a gmm-timit -s timit
-Here is the performance of the system on the Development set:
-* ``DEV: EER = 2.68%``
+   ./bin/verify.py --database=vera --protocol=1vsAll --preprocessor=none --extractor=repeatedlinetracking --algorithm=match-rlt --sub-directory="vera-1vsall-mnm04" --verbose --verbose
-MOBIO dataset
+This command line selects the following implementations for the toolchain:
-~~~~~~~~~~~~~
-This is a more challenging database. The noise and the short duration of the segments make the task of speaker recognition relatively difficult. The following experiment on male group (Mobile-0) uses the 4Hz modulation energy based VAD, and the ISV (with dimU=50) modelling technique::
+  * Database: Use the base Bob API for the VERA database implementation,
+    protocol variant ``1vsAll`` which corresponds to the 1-fold
+    cross-validation evaluation protocol described in [TVM14]_
+  * Preprocessor: Simple finger cropping, with no extra pre-processing
+  * Feature extractor: Repeated line tracking, as explained in [MNM04]_
+  * Matching algorithm: "Miura" matching, as explained on the same paper
-  $ bin/verify_isv.py -vv -d mobio-audio-male -p mod-4hz -e mfcc-60 -a isv-mobio -s isv --groups {dev,eval} -g demanding
+As the tool runs, you'll see printouts that show how it advances through
+preprocessing, feature extraction and matching.
-Here is the performance of this system:
-* ``DEV: EER = 13.81%``
+Available Resources
-* ``EVAL: HTER = 10.90%``
+-------------------
-To generate the results presented in the ICASSP 2014 paper, please check the script included in the `icassp` folder of the toolbox.
+This package provides various different ``bob.bio.base`` resource
-Note that the MOBIO dataset has different protocols, and that are all implemented in `bob.db.mobio`_. But in this toolbox, we provide separately mobile-0 protocol (into filelist format) for simplicity.
+configurations to handle a variety of techniques in vein recognition: database
+adaptors, preprocessors (cropping and illumination
+normalization), feature extractors and matching algorithms. In order to list
+each contribution, use the script ``./bin/resources.py``.
-NIST SRE 2012
+For available resources:
-~~~~~~~~~~~~~
-We first invite you to read the paper describing our system submitted to the NIST SRE 2012 Evaluation. The protocols on the development set are the results of a joint work by the I4U group. To reproduce the results, please check this dedicated package::
+  .. code-block:: sh
-  https://pypi.python.org/pypi/spear.nist_sre12
+     $ ./bin/resources.py --packages=bob.bio.vein
-.. note::
+For a detailed explanation and the configurability of each resource, consult
-  For any additional information, please use our mailing list::
+:ref:`bob.bio.vein.api`.
-  https://groups.google.com/forum/#!forum/bob-devel
 .. include:: links.rst
--- a/doc/links.rst
+++ b/doc/links.rst
@@ -18,3 +18,7 @@
 .. _virtualbox: https://www.virtualbox.org
 .. _hdf5: http://www.hdfgroup.org/HDF5
 .. _bob.bio.base: https://pypi.python.org/pypi/bob.bio.base
+.. _vera fingervein: https://www.idiap.ch/dataset/vera-fingervein
+.. _vera palmvein: https://www.idiap.ch/dataset/vera-palmvein
+.. _utfvp: http://scs.ewi.utwente.nl/downloads/show,Finger%20Vein/
+.. _put: http://biometrics.put.poznan.pl/vein-dataset/
--- a/doc/references.rst
+++ b/doc/references.rst
@@ -20,3 +20,5 @@
 .. [HDLTL10] *B. Huang, Y. Dai, R. Li, D. Tang and W. Li*, **Finger-vein authentication based on wide line detector and pattern normalization**, Proceedings on 20th International Conference on Pattern Recognition (ICPR), 2010.
 .. [MD13] *L. Mirmohamadsadeghi and A. Drygajlo*, **Palm vein recognition using local texture patterns**, IET Biometrics, pp. 1-9, 2013.
+.. [TVM14] *Pedro Tome, Matthias Vanoni and Sébastien Marcel*, **On the Vulnerability of Finger Vein Recognition to Spoofing**, in: IEEE International Conference of the Biometrics Special Interest Group (BIOSIG), Darmstadt, Germay, pages 1 - 10, IEEE, 2014