diff --git a/bob/bio/vein/algorithm/MiuraMatch.py b/bob/bio/vein/algorithm/MiuraMatch.py
index 86e02632e3c241b007895e2bcd9f21c656edf909..b2cc6549317da236fe44dfd348c829db62c8508c 100644
--- a/bob/bio/vein/algorithm/MiuraMatch.py
+++ b/bob/bio/vein/algorithm/MiuraMatch.py
@@ -4,7 +4,6 @@
 import numpy
 import scipy.signal
 
-import bob.ip.base
 from bob.bio.base.algorithm import Algorithm
 
 
@@ -96,20 +95,25 @@ class MiuraMatch (Algorithm):
 
     scores = []
 
-    for i in range(n_models):
+    # iterate over all models for a given individual
+    for md in model:
 
       # erode model by (ch, cw)
-      R=model[i,:].astype(numpy.float64)
+      R = md.astype(numpy.float64)
       h, w = R.shape
       crop_R = R[self.ch:h-self.ch, self.cw:w-self.cw]
 
-      # rotate input image
-      rotate_R = numpy.zeros((crop_R.shape[0], crop_R.shape[1]))
-      bob.ip.base.rotate(crop_R, rotate_R, 180)
-
-      # convolve model and probe using FFT/IFFT.
-      #Nm = utils.convfft(I, rotate_R) #drop-in replacement for scipy method
-      Nm = scipy.signal.convolve2d(I, rotate_R, 'valid')
+      # correlates using scipy - fastest option available iff the self.ch and
+      # self.cw are height (>30). In this case, the number of components
+      # returned by the convolution is high and using an FFT-based method
+      # yields best results. Otherwise, you may try  the other options bellow
+      # -> check our test_correlation() method on the test units for more
+      # details and benchmarks.
+      Nm = scipy.signal.fftconvolve(I, numpy.rot90(crop_R, k=2), 'valid')
+      # 2nd best: use convolve2d or correlate2d directly;
+      # Nm = scipy.signal.convolve2d(I, numpy.rot90(crop_R, k=2), 'valid')
+      # 3rd best: use correlate2d
+      # Nm = scipy.signal.correlate2d(I, crop_R, 'valid')
 
       # figures out where the maximum is on the resulting matrix
       t0, s0 = numpy.unravel_index(Nm.argmax(), Nm.shape)
diff --git a/bob/bio/vein/tests/test.py b/bob/bio/vein/tests/test.py
index b2c9530faabe555cd9b8f007e68b340e687e1928..ab9b76f9c6c3adddcba10568226bf8a4bd242766 100644
--- a/bob/bio/vein/tests/test.py
+++ b/bob/bio/vein/tests/test.py
@@ -24,7 +24,6 @@ import bob.io.matlab
 import bob.io.image
 
 from ..preprocessor import utils as preprocessor_utils
-from ..algorithm import utils as algorithm_utils
 
 
 def F(parts):
@@ -332,56 +331,150 @@ def test_intersection_ratio():
   nose.tools.eq_(preprocessor_utils.intersect_ratio_of_complement(b, numpy.zeros(b.shape, dtype=bool)), 0)
 
 
-def test_convolution():
+def test_correlation():
 
-  # A test for convolution performance. Convolutions are used on the Miura
+  # A test for convolution performance. Correlations are used on the Miura
   # Match algorithm, therefore we want to make sure we can perform them as fast
   # as possible.
+  import numpy
   import scipy.signal
+  import bob.sp
 
+  # Rough example from Vera fingervein database
   Y = 250
   X = 600
-  CH = 1
-  CW = 1
+  CH = 80
+  CW = 90
 
   def gen_ab():
     a = numpy.random.randint(256, size=(Y, X)).astype(float)
     b = numpy.random.randint(256, size=(Y-CH, X-CW)).astype(float)
     return a, b
 
-  def utils_function(a, b):
-    return algorithm_utils.convfft(a, b)
+
+  def bob_function(a, b):
+
+    # rotate input image by 180 degrees
+    b = numpy.rot90(b, k=2)
+
+    # Determine padding size in x and y dimension
+    size_a  = numpy.array(a.shape)
+    size_b  = numpy.array(b.shape)
+    outsize = size_a + size_b - 1
+
+    # Determine 2D cross correlation in Fourier domain
+    a2 = numpy.zeros(outsize)
+    a2[0:size_a[0],0:size_a[1]] = a
+    Fa = bob.sp.fft(a2.astype(numpy.complex128))
+
+    b2 = numpy.zeros(outsize)
+    b2[0:size_b[0],0:size_b[1]] = b
+    Fb = bob.sp.fft(b2.astype(numpy.complex128))
+
+    conv_ab = numpy.real(bob.sp.ifft(Fa*Fb))
+
+    h, w = size_a - size_b + 1
+
+    Nm = conv_ab[size_b[0]-1:size_b[0]-1+h, size_b[1]-1:size_b[1]-1+w]
+
+    t0, s0 = numpy.unravel_index(Nm.argmax(), Nm.shape)
+
+    # this is our output
+    Nmm = Nm[t0,s0]
+
+    # normalizes the output by the number of pixels lit on the input
+    # matrices, taking into consideration the surface that produced the
+    # result (i.e., the eroded model and part of the probe)
+    h, w = b.shape
+    return Nmm/(sum(sum(b)) + sum(sum(a[t0:t0+h-2*CH, s0:s0+w-2*CW])))
+
 
   def scipy_function(a, b):
-    return scipy.signal.convolve2d(a, b, 'valid')
+    b = numpy.rot90(b, k=2)
+
+    Nm = scipy.signal.convolve2d(a, b, 'valid')
+
+    # figures out where the maximum is on the resulting matrix
+    t0, s0 = numpy.unravel_index(Nm.argmax(), Nm.shape)
+
+    # this is our output
+    Nmm = Nm[t0,s0]
+
+    # normalizes the output by the number of pixels lit on the input
+    # matrices, taking into consideration the surface that produced the
+    # result (i.e., the eroded model and part of the probe)
+    h, w = b.shape
+    return Nmm/(sum(sum(b)) + sum(sum(a[t0:t0+h-2*CH, s0:s0+w-2*CW])))
+
 
   def scipy2_function(a, b):
-    return scipy.signal.fftconvolve(a, b, 'valid')
+    b = numpy.rot90(b, k=2)
+    Nm = scipy.signal.fftconvolve(a, b, 'valid')
+
+    # figures out where the maximum is on the resulting matrix
+    t0, s0 = numpy.unravel_index(Nm.argmax(), Nm.shape)
+
+    # this is our output
+    Nmm = Nm[t0,s0]
+
+    # normalizes the output by the number of pixels lit on the input
+    # matrices, taking into consideration the surface that produced the
+    # result (i.e., the eroded model and part of the probe)
+    h, w = b.shape
+    return Nmm/(sum(sum(b)) + sum(sum(a[t0:t0+h-2*CH, s0:s0+w-2*CW])))
+
+
+  def scipy3_function(a, b):
+    Nm = scipy.signal.correlate2d(a, b, 'valid')
+
+    # figures out where the maximum is on the resulting matrix
+    t0, s0 = numpy.unravel_index(Nm.argmax(), Nm.shape)
+
+    # this is our output
+    Nmm = Nm[t0,s0]
+
+    # normalizes the output by the number of pixels lit on the input
+    # matrices, taking into consideration the surface that produced the
+    # result (i.e., the eroded model and part of the probe)
+    h, w = b.shape
+    return Nmm/(sum(sum(b)) + sum(sum(a[t0:t0+h-2*CH, s0:s0+w-2*CW])))
 
   a, b = gen_ab()
-  assert numpy.allclose(utils_function(a, b), scipy_function(a, b))
+
+  assert numpy.allclose(bob_function(a, b), scipy_function(a, b))
   assert numpy.allclose(scipy_function(a, b), scipy2_function(a, b))
+  assert numpy.allclose(scipy2_function(a, b), scipy3_function(a, b))
 
+  # if you want to test timings, uncomment the following section
+  '''
   import time
 
   start = time.clock()
   N = 10
   for i in range(N):
     a, b = gen_ab()
-    utils_function(a, b)
+    bob_function(a, b)
   total = time.clock() - start
-  print('utils, %d iterations - %.2e per iteration' % (N, total/N))
+  print('bob implementation, %d iterations - %.2e per iteration' % (N, total/N))
 
   start = time.clock()
   for i in range(N):
     a, b = gen_ab()
     scipy_function(a, b)
   total = time.clock() - start
-  print('scipy, %d iterations - %.2e per iteration' % (N, total/N))
+  print('scipy+convolve, %d iterations - %.2e per iteration' % (N, total/N))
 
   start = time.clock()
   for i in range(N):
     a, b = gen_ab()
     scipy2_function(a, b)
   total = time.clock() - start
-  print('scipy2, %d iterations - %.2e per iteration' % (N, total/N))
+  print('scipy+fftconvolve, %d iterations - %.2e per iteration' % (N, total/N))
+
+  start = time.clock()
+  for i in range(N):
+    a, b = gen_ab()
+    scipy3_function(a, b)
+  total = time.clock() - start
+  print('scipy+correlate2d, %d iterations - %.2e per iteration' % (N, total/N))
+  '''