Commit 1ea1fb23 authored by Theophile GENTILHOMME's avatar Theophile GENTILHOMME

Add bob.measure.cli entry and implement bob measure evaluate using code of compute_perf.py

parent 3b270041
Pipeline #17787 passed with stage
in 20 minutes and 45 seconds
......@@ -41,6 +41,6 @@ def split(filename):
the first column containing -1 or 1 (i.e. negative or
positive) and the second the scores
(float).'''.format(filename))
return None, None
return (None, None)
return (scores[numpy.where(neg_pos == -1)],
scores[numpy.where(neg_pos == 1)])
import numpy
def remove_nan(scores):
"""removes the NaNs from the scores"""
nans = numpy.isnan(scores)
sum_nans = sum(nans)
total = len(scores)
if sum_nans > 0:
logger.warning('Found {} NaNs in {} scores'.format(sum_nans, total))
return scores[numpy.where(~nans)], sum_nans, total
def get_fta(scores):
"""calculates the Failure To Acquire (FtA) rate"""
fta_sum, fta_total = 0, 0
neg, sum_nans, total = remove_nan(scores[0])
fta_sum += sum_nans
fta_total += total
pos, sum_nans, total = remove_nan(scores[1])
fta_sum += sum_nans
fta_total += total
return ((neg, pos), fta_sum / fta_total)
'''Stores click common options for plots'''
import logging
import click
from click.types import INT, FLOAT, Choice, File
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
from bob.extension.scripts.click_helper import verbosity_option
logger = logging.getLogger(__name__)
def plot_options(f):
# more import options go down the list here.
f = click.pass_context(f)
f = verbosity_option()(f)
f = click.option(
'--style', multiple=True, type=click.types.Choice(plt.style.available),
help='The matplotlib style to use for plotting. You can provide '
'multiple styles by repeating this option')(f)
f = click.option(
'--titles', help='The title for each system comma separated. '
'Example: --titles ISV,CNN')(f)
f = click.option(
'--top', type=FLOAT,
help='To give to ``plt.subplots_adjust(top=top)``. If given, first '
'plt.tight_layout is called. If you want to tight_layout to be called,'
' then you need to provide this option.')(f)
f = click.option(
'--legend-ncol', default=3, show_default=True,
type=INT,
help='The number of columns of the legend layout.')(f)
f = click.option(
'--figsize', help='If given, will run '
'``plt.figure(figsize=figsize)(f)``. Example: --fig-size 4,6')(f)
# f = click.option(
# '--y2-label',
# help='The id of figures which should have y2_label separated by '
# 'comma. For example ``--y2-label 1,2,4``.')(f)
f = click.option(
'--y1-label',
help='The id of figures which should have y1_label separated by '
'comma. For example ``--y1-label 1,2,4``.')(f)
f = click.option(
'--x-label',
help='The id of figures which should have x_label separated by '
'comma. For example ``--x-label 1,2,4``.')(f)
f = click.option(
'--subplot', type=INT, default=111,
show_default=True, help='The order of subplots.')(f)
f = click.option(
'-o', '--output', type=File(mode='wb'),
default='plots.pdf', show_default=True,
help='The file to save the plots in.')(f)
return f
def normalize_options(ctx, n_systems, output, subplot, style, x_label,
y1_label, figsize, legend_ncol, top, titles,
y2_label=None):
if style:
plt.style.use(style)
ctx.meta['output'] = output
ctx.meta['PdfPages'] = PdfPages(output)
ctx.meta['x_label'] = x_label if x_label is None else \
[int(x) for x in x_label.split(',')]
ctx.meta['y1_label'] = y1_label if y1_label is None else \
[int(x) for x in y1_label.split(',')]
ctx.meta['y2_label'] = y2_label if y2_label is None else \
[int(x) for x in y2_label.split(',')]
ctx.meta['subplot'] = subplot
nrows = subplot // 10
nrows, ncols = divmod(nrows, 10)
logger.debug('Got %d, %d for nrows and ncols', nrows, ncols)
ctx.meta['nrows_ncols'] = nrows, ncols
ctx.meta['figsize'] = figsize if figsize is None else \
[float(x) for x in figsize.split(',')]
plt.figure(figsize=ctx.meta['figsize'])
ctx.meta['legend_ncol'] = legend_ncol
ctx.meta['top'] = top
ctx.meta['titles'] = titles if titles is None else titles.split(',')
nrows, ncols = ctx.meta['nrows_ncols']
if nrows * ncols < n_systems:
logger.error("The number of subplots is smaller than the number of "
"systems. I will plot one system a column. Use --subplot "
"to remove this error.")
nrows, ncols = 1, n_systems
ctx.meta['nrows'], ctx.meta['ncols'] = nrows, ncols
ctx.meta['titles'] = ctx.meta['titles'] or [None] * n_systems
# Try to automatically figure out where to place labels
# x_label should be True if row == -1
# y1_label should be True if col == 0
# y2_label should be True if col == -1
ctx.meta['x_label'] = ctx.meta['x_label'] or \
[x for x in range(1, n_systems + 1)
if ((x - 1) // ncols) == (nrows - 1)]
ctx.meta['y1_label'] = ctx.meta['y1_label'] or \
[x for x in range(1, n_systems + 1)
if ((x - 1) % ncols) == 0]
ctx.meta['y2_label'] = ctx.meta.get('y2_label', None) or \
[x for x in range(1, n_systems + 1)
if ((x - 1) % ncols) == (ncols - 1)]
return ctx
'''Runs error analysis on score sets, outputs metrics and plots'''
from __future__ import division, print_function
import sys
import numpy
import click
from click.types import INT, FLOAT, Choice, File
import bob.core
from .common_options import (plot_options, normalize_options)
from .. import load
LOG_FILE = sys.stdout
logger = bob.core.log.setup("bob.measure")
def remove_nan(scores):
"""removes the NaNs from the scores"""
nans = numpy.isnan(scores)
sum_nans = sum(nans)
total = len(scores)
if sum_nans > 0:
logger.warning('Found {} NaNs in {} scores'.format(sum_nans, total))
return scores[numpy.where(~nans)], sum_nans, total
def get_fta(scores):
"""calculates the Failure To Acquire (FtA) rate"""
fta_sum, fta_total = 0, 0
neg, sum_nans, total = remove_nan(scores[0])
fta_sum += sum_nans
fta_total += total
pos, sum_nans, total = remove_nan(scores[1])
fta_sum += sum_nans
fta_total += total
return ((neg, pos), fta_sum / fta_total)
def print_crit(crit, dev_scores, dev_fta, test_scores=None, test_fta=None):
"""Prints a single output line that contains all info for a given criterion"""
dev_neg, dev_pos = dev_scores
if crit == 'EER':
from .. import eer_threshold
thres = eer_threshold(dev_neg, dev_pos)
else:
from .. import min_hter_threshold
thres = min_hter_threshold(dev_neg, dev_pos)
from .. import farfrr
dev_fmr, dev_fnmr = farfrr(dev_neg, dev_pos, thres)
dev_far = dev_fmr * (1 - dev_fta)
dev_frr = dev_fta + dev_fnmr * (1 - dev_fta)
dev_hter = (dev_far + dev_frr) / 2.0
print("[Min. criterion: %s] Threshold on Development set: %e" % (crit, thres), file=LOG_FILE)
dev_ni = dev_neg.shape[0] # number of impostors
dev_fm = int(round(dev_fmr * dev_ni)) # number of false accepts
dev_nc = dev_pos.shape[0] # number of clients
dev_fnm = int(round(dev_fnmr * dev_nc)) # number of false rejects
dev_fmr_str = "%.3f%% (%d/%d)" % (100 * dev_fmr, dev_fm, dev_ni)
dev_fnmr_str = "%.3f%% (%d/%d)" % (100 * dev_fnmr, dev_fnm, dev_nc)
dev_max_len = max(len(dev_fmr_str), len(dev_fnmr_str))
def fmt(s, space):
return ('%' + ('%d' % space) + 's') % s
if test_scores is None:
# prints only dev performance rates
print(" | %s" % fmt("Development", -1 * dev_max_len), file=LOG_FILE)
print("-------+-%s" % (dev_max_len * "-"), file=LOG_FILE)
print(" FMR | %s" % fmt(dev_fmr_str, -1 * dev_max_len), file=LOG_FILE)
print(" FNMR | %s" % fmt(dev_fnmr_str, -1 * dev_max_len), file=LOG_FILE)
dev_far_str = "%.3f%%" % (100 * dev_far)
print(" FAR | %s" % fmt(dev_far_str, -1 * dev_max_len), file=LOG_FILE)
dev_frr_str = "%.3f%%" % (100 * dev_frr)
print(" FRR | %s" % fmt(dev_frr_str, -1 * dev_max_len), file=LOG_FILE)
dev_hter_str = "%.3f%%" % (100 * dev_hter)
print(" HTER | %s" % fmt(dev_hter_str, -1 * dev_max_len), file=LOG_FILE)
else:
# computes statistics for the test set based on the threshold a priori
test_neg, test_pos = test_scores
test_fmr, test_fnmr = farfrr(test_neg, test_pos, thres)
test_far = test_fmr * (1 - test_fta)
test_frr = test_fta + test_fnmr * (1 - test_fta)
test_hter = (test_far + test_frr) / 2.0
test_ni = test_neg.shape[0] # number of impostors
test_fm = int(round(test_fmr * test_ni)) # number of false accepts
test_nc = test_pos.shape[0] # number of clients
test_fnm = int(round(test_fnmr * test_nc)) # number of false rejects
test_fmr_str = "%.3f%% (%d/%d)" % (100 * test_fmr, test_fm, test_ni)
test_fnmr_str = "%.3f%% (%d/%d)" % (100 * test_fnmr, test_fnm, test_nc)
test_max_len = max(len(test_fmr_str), len(test_fnmr_str))
# prints both dev and test performance rates
print(" | %s | %s" % (fmt("Development", -1 * dev_max_len),
fmt("Test", -1 * test_max_len)),
file=LOG_FILE)
print("-------+-%s-+-%s" % (dev_max_len * "-", (2 + test_max_len) * "-"),
file=LOG_FILE)
print(" FMR | %s | %s" % (fmt(dev_fmr_str, -1 * dev_max_len),
fmt(test_fmr_str, -1 * test_max_len)),
file=LOG_FILE)
print(" FNMR | %s | %s" % (fmt(dev_fnmr_str, -1 * dev_max_len),
fmt(test_fnmr_str, -1 * test_max_len)),
file=LOG_FILE)
dev_far_str = "%.3f%%" % (100 * dev_far)
test_far_str = "%.3f%%" % (100 * test_far)
print(" FAR | %s | %s" % (fmt(dev_far_str, -1 * dev_max_len),
fmt(test_far_str, -1 * test_max_len)),
file=LOG_FILE)
dev_frr_str = "%.3f%%" % (100 * dev_frr)
test_frr_str = "%.3f%%" % (100 * test_frr)
print(
" FRR | %s | %s" % (fmt(dev_frr_str, -1 * dev_max_len),
fmt(test_frr_str, -1 * test_max_len)),
file=LOG_FILE
)
dev_hter_str = "%.3f%%" % (100 * dev_hter)
test_hter_str = "%.3f%%" % (100 * test_hter)
print(
" HTER | %s | %s" % (fmt(dev_hter_str, -1 * dev_max_len),
fmt(test_hter_str, -1 * test_max_len)),
file=LOG_FILE
)
def plots(crit, points, filename, dev_scores, test_scores=None):
"""Saves ROC, DET and EPC curves on the file pointed out by filename."""
dev_neg, dev_pos = dev_scores
if test_scores is not None:
test_neg, test_pos = test_scores
else:
test_neg, test_pos = None, None
from .. import plot
import matplotlib
if not hasattr(matplotlib, 'backends'):
matplotlib.use('pdf')
import matplotlib.pyplot as mpl
from matplotlib.backends.backend_pdf import PdfPages
pp = PdfPages(filename)
# ROC
fig = mpl.figure()
if test_scores is not None:
plot.roc(dev_neg, dev_pos, points, color=(0.3, 0.3, 0.3),
linestyle='--', dashes=(6, 2), label='development')
plot.roc(test_neg, test_pos, points, color=(0, 0, 0),
linestyle='-', label='test')
else:
plot.roc(dev_neg, dev_pos, points, color=(0, 0, 0),
linestyle='-', label='development')
mpl.axis([0, 40, 0, 40])
mpl.title("ROC Curve")
mpl.xlabel('FMR (%)')
mpl.ylabel('FNMR (%)')
mpl.grid(True, color=(0.3, 0.3, 0.3))
if test_scores is not None:
mpl.legend()
pp.savefig(fig)
# DET
fig = mpl.figure()
if test_scores is not None:
plot.det(dev_neg, dev_pos, points, color=(0.3, 0.3, 0.3),
linestyle='--', dashes=(6, 2), label='development')
plot.det(test_neg, test_pos, points, color=(0, 0, 0),
linestyle='-', label='test')
else:
plot.det(dev_neg, dev_pos, points, color=(0, 0, 0),
linestyle='-', label='development')
plot.det_axis([0.01, 40, 0.01, 40])
mpl.title("DET Curve")
mpl.xlabel('FMR (%)')
mpl.ylabel('FNMR (%)')
mpl.grid(True, color=(0.3, 0.3, 0.3))
if test_scores is not None:
mpl.legend()
pp.savefig(fig)
# EPC - requires test set
if test_scores is not None:
fig = mpl.figure()
plot.epc(dev_neg, dev_pos, test_neg, test_pos, points,
color=(0, 0, 0), linestyle='-')
mpl.title('EPC Curve')
mpl.xlabel('Cost')
mpl.ylabel('Min. HTER (%)')
mpl.grid(True, color=(0.3, 0.3, 0.3))
pp.savefig(fig)
# Distribution for dev and test scores on the same page
if crit == 'EER':
from .. import eer_threshold
thres = eer_threshold(dev_neg, dev_pos)
else:
from .. import min_hter_threshold
thres = min_hter_threshold(dev_neg, dev_pos)
fig = mpl.figure()
if test_scores is not None:
mpl.subplot(2, 1, 1)
all_scores = numpy.hstack((dev_neg, test_neg, dev_pos, test_pos))
else:
all_scores = numpy.hstack((dev_neg, dev_pos))
nbins = 20
score_range = all_scores.min(), all_scores.max()
mpl.hist(dev_neg, label='Impostors', normed=True, color='red', alpha=0.5,
bins=nbins)
mpl.hist(dev_pos, label='Genuine', normed=True, color='blue', alpha=0.5,
bins=nbins)
mpl.xlim(*score_range)
_, _, ymax, ymin = mpl.axis()
mpl.vlines(thres, ymin, ymax, color='black', label='EER', linestyle='dashed')
if test_scores is not None:
ax = mpl.gca()
ax.axes.get_xaxis().set_ticklabels([])
mpl.legend(loc='upper center', ncol=3, bbox_to_anchor=(0.5, -0.01),
fontsize=10)
mpl.ylabel('Dev. Scores (normalized)')
else:
mpl.ylabel('Normalized Count')
mpl.legend(loc='best', fancybox=True, framealpha=0.5)
mpl.title('Score Distributions')
mpl.grid(True, alpha=0.5)
if test_scores is not None:
mpl.subplot(2, 1, 2)
mpl.hist(test_neg, label='Impostors', normed=True, color='red', alpha=0.5,
bins=nbins)
mpl.hist(test_pos, label='Genuine', normed=True, color='blue', alpha=0.5,
bins=nbins)
mpl.ylabel('Test Scores (normalized)')
mpl.xlabel('Score value')
mpl.xlim(*score_range)
_, _, ymax, ymin = mpl.axis()
mpl.vlines(thres, ymin, ymax, color='black', label='EER',
linestyle='dashed')
mpl.grid(True, alpha=0.5)
pp.savefig(fig)
pp.close()
@click.command()
@click.option('-l', '--log', help='If provided, computed numbers are written to \
this file instead of the standard output.')
@click.option('-x', '--no-plot', default=False, show_default=True,
help='If True, then I\'ll execute no plotting')
@click.option('-n', '--points', type=INT, default=100, show_default=True,
help='Number of points to use in the curves')
@click.option('-o', '--output', default='curves.pdf', show_default=True,
help='Number of points to use in the curves')
@click.argument('scores', nargs=-1)
def evaluate(scores, output, points, no_plot, log):
'''Runs error analysis on score sets
1. Computes the threshold using either EER or min. HTER criteria on
development set scores
2. Applies the above threshold on test set scores to compute the HTER, if a
test-score set is provided
3. Reports error rates on the console
4. Plots ROC, EPC, DET curves and score distributions to a multi-page PDF
file (unless --no-plot is passed)
You need to provide 2 score files for each biometric system in this order:
\b
* development scores
* evaluation scores
Examples:
$ bob measure evaluate dev-scores test-scores
'''
if len(scores) < 1:
logger.error("No score argument(s).")
return -1
# setup the logfile
global LOG_FILE
if log is not None:
LOG_FILE = open(log, 'w')
assert points > 0, "Numbers of points must be positive"
dev_scores = load.split(scores[0])
if len(scores) > 1:
test_scores = load.split(scores[1])
else:
test_scores = None
test_fta = None
# test if there are nan in the score files and remove them
# also calculate FTA
dev_scores, dev_fta = get_fta(dev_scores)
print("Failure To Acquire (FTA) in the development set is: {:.3f}%".format(
dev_fta * 100), file=LOG_FILE)
if test_scores is not None:
test_scores, test_fta = get_fta(test_scores)
print("Failure To Acquire (FTA) in the test set is: {:.3f}%".format(
test_fta * 100), file=LOG_FILE)
print_crit('EER', dev_scores, dev_fta, test_scores, test_fta)
print_crit('Min. HTER', dev_scores, dev_fta, test_scores, test_fta)
if not no_plot:
plots(
'EER', points,
output, dev_scores,
test_scores
)
print(
"[Plots] Performance curves => '%s'" % output,
file=LOG_FILE
)
LOG_FILE.flush()
return 0
"""The main entry for bob.measure (click-based) scripts.
"""
import click
import pkg_resources
from click_plugins import with_plugins
@with_plugins(pkg_resources.iter_entry_points('bob.measure.cli'))
@click.group()
def measure():
"""Entry for bob.measure commands."""
pass
......@@ -22,7 +22,7 @@ setup(
name='bob.measure',
version=version,
description='Evalution metrics for Bob',
description='Evaluation metrics for Bob',
url='http://gitlab.idiap.ch/bob/bob.measure',
license='BSD',
author='Andre Anjos',
......@@ -65,11 +65,14 @@ setup(
},
entry_points={
'console_scripts': [
'bob_compute_perf.py = bob.measure.script.compute_perf:main',
'bob_eval_threshold.py = bob.measure.script.eval_threshold:main',
'bob_apply_threshold.py = bob.measure.script.apply_threshold:main',
'bob_plot_cmc.py = bob.measure.script.plot_cmc:main',
# main entry for bob measure cli
'bob.cli': [
'measure = bob.measure.script.measure:measure',
],
# bob measure scripts
'bob.measure.cli': [
'evaluate = bob.measure.script.evaluate:evaluate',
],
},
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment