Commit fb45722e authored by Manuel Günther's avatar Manuel Günther

Added function to convert Bob's 4col and 5col score files to OpenBR matrices

parent 58802bc9
......@@ -9,6 +9,7 @@ from .version import module as __version__
from . import plot
from . import load
from . import calibration
from . import openbr
import numpy
def mse (estimation, target):
......
S2
unknown-gallery.lst
unknown-probe.lst
MB 100 20 xV4

\ No newline at end of file
"""This file includes functionality to convert between Bob's four column or five column score files and the Matrix files used in OpenBR."""
import numpy
import logging
logger = logging.getLogger("bob.measure")
from .load import open_file, four_column, five_column
def write_matrix(
score_file,
matrix_file,
mask_file,
model_names = None,
probe_names = None,
score_file_format = '4column',
gallery_file_name = 'unknown-gallery.lst',
probe_file_name = 'unknown-probe.lst'
):
"""Writes the OpenBR matrix and mask files (version 2), given the score file.
If gallery and probe names are provided, the matrices in both files will be sorted by gallery and probe names.
.. warning::
When provided with a 4-column score file, this function will work only, if there is only a single model id for each client.
Keyword parameters:
score_file : str
The 4 or 5 column style score file written by bob.
matrix_file : str
The OpenBR matrix file that should be written.
Usually, the file name extension is .mtx
mask_file : str
The OpenBR mask file that should be written.
The mask file defines, which values are positives, negatives or to be ignored.
gallery_file_name : str
The name of the gallery file that will be written in the header of the OpenBR files.
probe_file_name : str
The name of the probe file that will be written in the header of the OpenBR files.
model_names : [str] or ``None``
If given, the matrix will be written in the same order as the given model names.
The model names must be identical with the second column in the 5-column ``score_file``.
.. note::
If the score file is in four column format, the model_names must be the client ids stored in the first row.
In this case, there might be only a single model per client
Only the scores of the given models will be considered.
probe_names : [str] or ``None``
If given, the matrix will be written in the same order as the given probe names (the path of the probe).
The probe names are identical to the third line of the ``score_file``.
Only the scores of the given probe names will be considered in this case.
"""
def _write_matrix(filename, matrix):
## Helper function to write a matrix file as required by OpenBR
with open(filename, 'wb') as f:
# write the first four lines
f.write("S2\n%s\n%s\nM%s %d %d " % (gallery_file_name, probe_file_name, 'B' if matrix.dtype == numpy.uint8 else 'F', matrix.shape[0], matrix.shape[1]))
# write magic number
numpy.array(0x12345678, numpy.int32).tofile(f)
f.write("\n")
# write the matrix
matrix.tofile(f)
# define read functions, and which information should be read
read_function = {'4column' : four_column, '5column' : five_column}[score_file_format]
offset = {'4column' : 0, '5column' : 1}[score_file_format]
# first, read the score file and estimate model ids and probe names, if not given
if model_names is None or probe_names is None:
model_names, probe_names = [], []
model_set, probe_set = set(), set()
# read the score file
for line in read_function(score_file):
model, probe = line[offset], line[2+offset]
if model not in model_set:
model_names.append(model)
model_set.add(model)
if probe not in probe_set:
probe_names.append(probe)
probe_set.add(probe)
# create a shortcut to get indices for client and probe subset (to increase speed)
model_dict = {m:i for i,m in enumerate(model_names)}
probe_dict = {p:i for i,p in enumerate(probe_names)}
# now, create the matrices in the desired size
matrix = numpy.ndarray((len(probe_names), len(model_names)), numpy.float32)
matrix[:] = numpy.nan
mask = numpy.zeros(matrix.shape, numpy.uint8)
# now, iterate through the score file and fill in the matrix
for line in read_function(score_file):
client, model, id, probe, score = line[0], line[offset], line[1+offset], line[2+offset], line[3+offset]
assert model in model_dict
assert probe in probe_dict
model_index = model_dict[model]
probe_index = probe_dict[probe]
# check, if we have already written something into that matrix element
if mask[probe_index, model_index]:
logger.warn("Overwriting existing matrix '%f' element of client '%s' and probe '%s' with '%f'", matrix[probe_index, model_index], client, probe, score)
matrix[probe_index, model_index] = score
mask[probe_index, model_index] = 0xff if client == id else 0x7f
# OK, now finally write the file in the desired format
_write_matrix(mask_file, mask)
_write_matrix(matrix_file, matrix)
......@@ -8,7 +8,9 @@
"""Tests the IO functionality of bob.measure."""
import bob.measure
import pkg_resources
import tempfile, os, shutil
import bob.io.base.test_utils
def test_load_scores():
# This function tests the IO functionality of loading score files in different ways
......@@ -18,18 +20,62 @@ def test_load_scores():
cols = {'4col' : 4, '5col' : 5}
for variant in ('4col', '5col'):
# read score file in normal way
normal_score_file = pkg_resources.resource_filename('bob.measure', 'data/dev-%s.txt' % variant)
normal_score_file = bob.io.base.test_utils.datafile('dev-%s.txt' % variant, 'bob.measure')
normal_scores = list(load_functions[variant](normal_score_file))
assert len(normal_scores) == 910
assert all(len(s) == cols[variant] for s in normal_scores)
# read the compressed score file
compressed_score_file = pkg_resources.resource_filename('bob.measure', 'data/dev-%s.tar.gz' % variant)
compressed_score_file = bob.io.base.test_utils.datafile('dev-%s.tar.gz' % variant, 'bob.measure')
compressed_scores = list(load_functions[variant](compressed_score_file))
assert len(compressed_scores) == len(normal_scores)
assert all(len(c) == cols[variant] for c in compressed_scores)
assert all(c[i] == s[i] for c,s in zip(compressed_scores, normal_scores) for i in range(cols[variant]))
def _check_binary_identical(name1, name2):
# see: http://www.peterbe.com/plog/using-md5-to-check-equality-between-files
import md5
# tests if two files are binary identical
with open(name1) as f1, open(name2) as f2:
assert md5.new(f1.read()).digest() == md5.new(f2.read()).digest()
def test_convert_openbr():
# This function tests that the conversion to the OpenBR file works as expected
temp_dir = tempfile.mkdtemp(prefix='bob_test')
# define output files
openbr_extensions = ('.mtx', '.mask')
matrix_file, mask_file = [os.path.join(temp_dir, "scores%s") % ext for ext in openbr_extensions]
try:
for variant in ('4col', '5col'):
# get score file
score_file = bob.io.base.test_utils.datafile('scores-cmc-%s.txt' % variant, 'bob.measure')
# first round, do not define keyword arguments -- let the file get the gallery and probe ids automatically
kwargs = {}
for i in range(2):
# get the files by automatically obtaining the identities
bob.measure.openbr.write_matrix(score_file, matrix_file, mask_file, score_file_format = "%sumn" % variant, **kwargs)
assert os.path.isfile(matrix_file) and os.path.isfile(mask_file)
# check that they are binary identical to the reference files (which are tested to work and give the same results with OpenBR)
matrix_ref, mask_ref = [bob.io.base.test_utils.datafile('scores%s' % ext, 'bob.measure') for ext in openbr_extensions]
_check_binary_identical(matrix_file, matrix_ref)
_check_binary_identical(mask_file, mask_ref)
# define new kwargs for second round, i.e., define model and probe names
# these names are identical to what is found in the score file, which in turn comes from the AT&T database
model_type = {"4col" : "%d", "5col" : "s%d"}[variant]
dev_ids = (3,4,7,8,9,13,15,18,19,22,23,25,28,30,31,32,35,37,38,40)
kwargs['model_names'] = [model_type % c for c in dev_ids]
kwargs['probe_names'] = ["s%d/%d" %(c,i) for c in dev_ids for i in (1,3,6,8,10)]
finally:
shutil.rmtree(temp_dir)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment