Skip to content
Snippets Groups Projects
Commit 76ad3291 authored by Theophile GENTILHOMME's avatar Theophile GENTILHOMME
Browse files

Refactor some of the come abd add functionalities

parent 82fd9990
No related branches found
No related tags found
2 merge requests!54Refactors the score loading and scripts functionality,!52generic plotting script for bob measure
......@@ -10,7 +10,6 @@ from bob.extension.scripts.click_helper import verbosity_option
logger = logging.getLogger(__name__)
def scores_argument(test_mandatory=False, **kwargs):
'''Get the argument for scores, and add `dev-scores` and `test-scores` in
the context if `--test` flag is on (default `--no-test`).'''
......@@ -20,8 +19,10 @@ def scores_argument(test_mandatory=False, **kwargs):
if length < 1:
raise click.BadParameter('No scores provided', ctx=ctx)
else:
div = 1
ctx.meta['scores'] = value
if test_mandatory or ctx.meta['test']:
div = 2
if (length % 2) != 0:
pref = 'T' if test_mandatory else ('When `--test` flag'
' is on t')
......@@ -34,8 +35,16 @@ def scores_argument(test_mandatory=False, **kwargs):
ctx.meta['test-scores'] = [value[i] for i in
range(length) if i % 2]
ctx.meta['n_sys'] = len(ctx.meta['test-scores'])
if 'titles' in ctx.meta and \
len(ctx.meta['titles']) != len(value) / div:
raise click.BadParameter(
'#titles not equal to #sytems', ctx=ctx
)
return value
return click.argument('scores', callback=callback, **kwargs)(func)
return click.argument(
'scores', type=click.Path(exists=True),
callback=callback, **kwargs
)(func)
return custom_scores_argument
def test_option(**kwargs):
......@@ -47,9 +56,24 @@ def test_option(**kwargs):
return click.option(
'-t', '--test/--no-test', default=False,
help='If set, test scores must be provided',
show_default=True,
callback=callback, is_eager=True ,**kwargs)(func)
return custom_test_option
def sep_dev_test_option(**kwargs):
'''Get option flag to say if dev and test plots should be in different
plots'''
def custom_sep_dev_test_option(func):
def callback(ctx, param, value):
ctx.meta['split'] = value
return value
return click.option(
'-s', '--split/--no-split', default=True, show_default=True,
help='If set, test and dev curve in different plots',
callback=callback, is_eager=True,**kwargs)(func)
return custom_sep_dev_test_option
def n_sys_option(**kwargs):
'''Get the number of systems to be processed'''
def custom_n_sys_option(func):
......@@ -96,8 +120,6 @@ def n_bins_option(**kwargs):
callback=callback, **kwargs)(func)
return custom_n_bins_option
@click.option('-n', '--points', type=INT, default=100, show_default=True,
help='Number of points to use in the curves')
def table_option(**kwargs):
'''Get table option for tabulate package
More informnations: https://pypi.python.org/pypi/tabulate
......@@ -187,6 +209,19 @@ def criterion_option(**kwargs):
callback=callback, is_eager=True ,**kwargs)(func)
return custom_criterion_option
def threshold_option(**kwargs):
'''Get option for given threshold'''
def custom_threshold_option(func):
def callback(ctx, param, value):
ctx.meta['thres'] = value
return value
return click.option(
'--thres', type=click.FLOAT, default=None,
help='Given threshold for metrics computations',
callback=callback, show_default=True,**kwargs)(func)
return custom_threshold_option
def label_option(name_option='x-label', **kwargs):
'''Get labels options based on the given name.
......@@ -288,11 +323,13 @@ def titles_option(**kwargs):
'''Get the titles otpion for the different systems'''
def custom_titles_option(func):
def callback(ctx, param, value):
ctx.meta['titles'] = value if value is None else \
value.split(',')
if value is not None:
value = value.split(',')
ctx.meta['titles'] = value
return value
return click.option(
'--titles', help='The title for each system comma separated. '
'--titles', type=click.STRING, default=None,
help='The title for each system comma separated. '
'Example: --titles ISV,CNN',
callback=callback, **kwargs)(func)
return custom_titles_option
......
......@@ -61,9 +61,10 @@ def _process_scores(dev_path, test_path, test):
if test_neg is None:
raise click.UsageError("While loading test-score file %s" %\
test_path)
if test_file is not None:
test_file = test_file.split(".")[0]
return (dev_neg, dev_pos, dev_fta, test_neg, test_pos,
test_fta, dev_file, test_file)
test_fta, dev_file.split(".")[0], test_file)
def _get_thres(criter, neg, pos):
'''Computes threshold from the given criterion and pos/neg scores'''
......@@ -81,6 +82,35 @@ def _get_scores(ctx, scores=None):
test = None if 'test-scores' not in ctx.meta else ctx.meta['test-scores']
return (dev, test)
def _get_colors(n):
if n > 10:
cmap = mpl.cm.get_cmap(name='magma')
return [cmap(i) for i in numpy.linspace(0, 1.0, n + 1)]
# matplotlib 2.0 default color cycler list: Vega category10 palette
return ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728',
'#9467bd', '#8c564b', '#e377c2', '#7f7f7f',
'#bcbd22', '#17becf']
LINESTYLES = [
(0, ()), #solid
(0, (4, 4)), #dashed
(0, (1, 5)), #dotted
(0, (3, 5, 1, 5)), #dashdotted
(0, (3, 5, 1, 5, 1, 5)), #dashdotdotted
(0, (5, 1)), #densely dashed
(0, (1, 1)), #densely dotted
(0, (3, 1, 1, 1)), #densely dashdotted
(0, (3, 1, 1, 1, 1, 1)), #densely dashdotdotted
(0, (5, 10)), #loosely dashed
(0, (3, 10, 1, 10)), #loosely dashdotted
(0, (3, 10, 1, 10, 1, 10)), #loosely dashdotdotted
(0, (1, 10)) #loosely dotted
]
@click.command()
@common_options.scores_argument(nargs=-1)
@common_options.table_option()
......@@ -88,14 +118,15 @@ def _get_scores(ctx, scores=None):
@common_options.open_file_mode_option()
@common_options.output_plot_metric_option()
@common_options.criterion_option()
@common_options.threshold_option()
@verbosity_option()
@click.pass_context
def metrics(ctx, criter, scores, log, test, open_mode,
def metrics(ctx, criter, scores, log, test, open_mode, thres,
tablefmt='fancy_grid', **kargs):
"""Prints a single output line that contains all info for a given
criterion (eer or hter).
You need provide one or more development score file(s) for each experiment.
You need provide one or more development score file(s) for each experiment.
You can also provide test files along with dev files but the flag `--test`
is required in that case.
......@@ -106,10 +137,9 @@ def metrics(ctx, criter, scores, log, test, open_mode,
Examples:
$ bob measure metrics dev-scores
$ bob measure metrics --test dev-scores1 test-scores1 dev-scores2
test-scores2
$ bob measure metrics --test -l results.txt dev-scores1 test-scores1
$ bob measure metrics --test {dev,test}-scores1 {dev,test}-scores2
"""
if log is not None:
......@@ -123,15 +153,22 @@ def metrics(ctx, criter, scores, log, test, open_mode,
dev_neg, dev_pos, dev_fta, test_neg, test_pos,\
test_fta, dev_file, test_file =\
_process_scores(dev_path, test_path, test)
thres = _get_thres(criter, dev_neg, dev_pos)
threshold = _get_thres(criter, dev_neg, dev_pos) if thres is None else\
thres
if thres is None:
click.echo("[Min. criterion: %s] Threshold on Development set `%s`: %e"\
% (criter.upper(), dev_file, threshold), file=log_file)
else:
click.echo("[Min. criterion: user provider] Threshold on"
"Development set `%s`: %e"\
% (dev_file, threshold), file=log_file)
from .. import farfrr
dev_fmr, dev_fnmr = farfrr(dev_neg, dev_pos, thres)
dev_fmr, dev_fnmr = farfrr(dev_neg, dev_pos, threshold)
dev_far = dev_fmr * (1 - dev_fta)
dev_frr = dev_fta + dev_fnmr * (1 - dev_fta)
dev_hter = (dev_far + dev_frr) / 2.0
click.echo("[Min. criterion: %s] Threshold on Development set `%s`: %e"\
% (criter.upper(), dev_file, thres), file=log_file)
dev_ni = dev_neg.shape[0] # number of impostors
dev_fm = int(round(dev_fmr * dev_ni)) # number of false accepts
......@@ -152,7 +189,7 @@ def metrics(ctx, criter, scores, log, test, open_mode,
if test and test_neg is not None:
# computes statistics for the test set based on the threshold a priori
test_fmr, test_fnmr = farfrr(test_neg, test_pos, thres)
test_fmr, test_fnmr = farfrr(test_neg, test_pos, threshold)
test_far = test_fmr * (1 - test_fta)
test_frr = test_fta + test_fnmr * (1 - test_fta)
test_hter = (test_far + test_frr) / 2.0
......@@ -194,18 +231,52 @@ def _end_pp(ctx, pp):
if 'closef' not in ctx.meta or ctx.meta['closef']:
pp.close()
def _label(base, name, idx, multi=False, legend = None):
if legend is not None and len(legend) > idx:
return legend[idx]
if multi:
return base + (" %d (%s)" % (idx + 1, name))
return base + (" (%s)" % name)
def _get_title(base, idx, split, test):
states = ['Development', 'Evaluation']
if not test:
return base + (" (%s)" % states[0])
if split:
return base + (" (%s)" % states[idx])
return base
def _setup_plot(pp, title_base, axis=None, x_label='FMR (%)', y_label='FNMR (%)',
grid_color='silver', test=False, split=False, legend=False):
nb_figs = 2 if test and split else 1
for i in range(nb_figs):
fig = mpl.figure(i + 1)
if axis is not None:
mpl.axis(axis)
mpl.title(_get_title(title_base, i, split, test))
mpl.xlabel(x_label)
mpl.ylabel(y_label)
mpl.grid(True, color=grid_color)
if legend or (test and not split):
mpl.legend()
#gives warning when applied with mpl
fig.set_tight_layout(True)
pp.savefig(fig)
@click.command()
@common_options.scores_argument(nargs=-1)
@common_options.titles_option()
@common_options.sep_dev_test_option()
@common_options.output_plot_file_option(default_out='roc.pdf')
@common_options.test_option()
@common_options.points_curve_option()
@verbosity_option()
@click.pass_context
def roc(ctx, output, scores, test, points, **kargs):
def roc(ctx, output, scores, test, points, titles, split, **kargs):
"""Plot ROC (receiver operating characteristic) curve:
plot of the rate of false positives (i.e. impostor attempts accepted) on the
x-axis against the corresponding rate of true positives (i.e. genuine attempts
accepted) on the y-axis plotted parametrically as a function of a decision
accepted) on the y-axis parametrically as a function of a decision
threshold
You need provide one or more development score file(s) for each experiment.
......@@ -223,41 +294,46 @@ def roc(ctx, output, scores, test, points, **kargs):
pp = _mplt_setup(ctx, output)
dev_scores, test_scores = _get_scores(ctx, scores)
multi_plots = len(dev_scores) > 1
colors = _get_colors(len(dev_scores))
for idx, dev_path in enumerate(dev_scores):
test_path = test_scores[idx] if test_scores is not None else None
dev_neg, dev_pos, _, test_neg, test_pos,\
_, dev_file, test_file =\
_process_scores(dev_path, test_path, test)
fig = mpl.figure()
mpl.figure(1)
if test:
plot.roc(dev_neg, dev_pos, points, color=(0.3, 0.3, 0.3),
linestyle='--', dashes=(6, 2), label='development')
plot.roc(test_neg, test_pos, points, color=(0, 0, 0),
linestyle='-', label='test')
linestyle = '-' if not split else LINESTYLES[idx % 14]
plot.roc(dev_neg, dev_pos, points, color=colors[idx],
linestyle=linestyle,
label=_label('development', dev_file, idx,
multi_plots, titles))
if split:
mpl.figure(2)
linestyle = '--' if not split else LINESTYLES[idx % 14]
plot.roc(test_neg, test_pos, points, color=colors[idx],
linestyle=linestyle,
label=_label('test', test_file, idx,
multi_plots, titles))
else:
plot.roc(dev_neg, dev_pos, points, color=(0, 0, 0),
linestyle='-', label='development')
title = dev_file + (" / %s" % test_file if test else "")
mpl.axis([0, 40, 0, 40])
mpl.title("ROC Curve (%s)" % title)
mpl.xlabel('FMR (%)')
mpl.ylabel('FNMR (%)')
mpl.grid(True, color=(0.3, 0.3, 0.3))
if test:
mpl.legend()
pp.savefig(fig)
plot.roc(dev_neg, dev_pos, points, color=colors[idx],
linestyle=LINESTYLES[idx % 14],
label=_label('development', dev_file, idx,
multi_plots, titles))
_setup_plot(pp, title_base="ROC Curve", axis=[0, 40, 0, 40], test=test,
split=split, legend=multi_plots)
_end_pp(ctx, pp)
@click.command()
@common_options.scores_argument(nargs=-1)
@common_options.output_plot_file_option(default_out='det.pdf')
@common_options.titles_option()
@common_options.sep_dev_test_option()
@common_options.test_option()
@common_options.points_curve_option()
@verbosity_option()
@click.pass_context
def det(ctx, output, scores, test, points, **kargs):
def det(ctx, output, scores, test, points, titles, split, **kargs):
"""Plot DET (detection error trade-off) curve:
modified ROC curve which plots error rates on both axes
(false positives on the x-axis and false negatives on the y-axis)
......@@ -277,39 +353,50 @@ def det(ctx, output, scores, test, points, **kargs):
pp = _mplt_setup(ctx, output)
dev_scores, test_scores = _get_scores(ctx, scores)
multi_plots = len(dev_scores) > 1
colors = _get_colors(len(dev_scores))
for idx, dev_path in enumerate(dev_scores):
test_path = test_scores[idx] if test_scores is not None else None
dev_neg, dev_pos, _, test_neg, test_pos,\
_, dev_file, test_file =\
_process_scores(dev_path, test_path, test)
fig = mpl.figure()
mpl.figure(1)
if test and test_neg is not None:
plot.det(dev_neg, dev_pos, points, color=(0.3, 0.3, 0.3),
linestyle='--', dashes=(6, 2), label='development')
plot.det(test_neg, test_pos, points, color=(0, 0, 0),
linestyle='-', label='test')
linestyle = '-' if not split else LINESTYLES[idx % 14]
plot.det(dev_neg, dev_pos, points, color=colors[idx],
linestyle=linestyle,
label=_label('development', dev_file, idx,
multi_plots, titles))
if split:
mpl.figure(2)
linestyle = '--' if not split else LINESTYLES[idx % 14]
plot.det(test_neg, test_pos, points, color=colors[idx],
linestyle=linestyle,
label=_label('test', test_file, idx,
multi_plots, titles))
else:
plot.det(dev_neg, dev_pos, points, color=(0, 0, 0),
linestyle='-', label='development')
plot.det(dev_neg, dev_pos, points, color=colors[idx],
linestyle=LINESTYLES[idx % 14],
label=_label('development', dev_file, idx,
multi_plots, titles))
mpl.figure(1)
plot.det_axis([0.01, 40, 0.01, 40])
if test and split:
mpl.figure(2)
plot.det_axis([0.01, 40, 0.01, 40])
title = dev_file + (" / %s" % test_file if test else "")
mpl.title("DET Curve (%s)" % title)
mpl.xlabel('FMR (%)')
mpl.ylabel('FNMR (%)')
mpl.grid(True, color=(0.3, 0.3, 0.3))
if test:
mpl.legend()
pp.savefig(fig)
_setup_plot(pp, title_base="DET Curve", test=test,
split=split, legend=multi_plots)
_end_pp(ctx, pp)
@click.command()
@common_options.scores_argument(test_mandatory=True, nargs=-1)
@common_options.output_plot_file_option(default_out='epc.pdf')
@common_options.titles_option()
@common_options.points_curve_option()
@verbosity_option()
@click.pass_context
def epc(ctx, output, points, **kargs):
def epc(ctx, output, points, titles, **kargs):
"""Plot EPC (expected performance curve):
plots the error rate on the test set depending on a threshold selected
a-priori on the development set and accounts for varying relative cost β
......@@ -329,19 +416,19 @@ def epc(ctx, output, points, **kargs):
raise click.UsageError("EPC requires dev and test score files")
dev_scores, test_scores = _get_scores(ctx)
for dev_path, test_path in zip(dev_scores, test_scores):
multi_plots = len(dev_scores) > 1
colors = _get_colors(len(dev_scores))
mpl.figure(1)
for idx, (dev_path, test_path) in enumerate(zip(dev_scores, test_scores)):
dev_neg, dev_pos, _, test_neg, test_pos,\
_, dev_file, test_file =\
_process_scores(dev_path, test_path, True)
fig = mpl.figure()
plot.epc(dev_neg, dev_pos, test_neg, test_pos, points,
color=(0, 0, 0), linestyle='-')
title = dev_file + " / " + test_file
mpl.title('EPC Curve (%s)' % title)
mpl.xlabel('Cost')
mpl.ylabel('Min. HTER (%)')
mpl.grid(True, color=(0.3, 0.3, 0.3))
pp.savefig(fig)
color=colors[idx], linestyle=LINESTYLES[idx % 14],
label=_label('curve', dev_file + "_" + test_file,
idx, multi_plots, titles))
_setup_plot(pp, title_base="EPC", x_label='Cost', y_label='Min. HTER (%)',
test=True, legend=True)
_end_pp(ctx, pp)
@click.command()
......@@ -421,6 +508,8 @@ def hist(ctx, output, scores, criter, test, nbins, **kargs):
@click.command()
@common_options.scores_argument(nargs=-1)
@common_options.titles_option()
@common_options.sep_dev_test_option()
@common_options.table_option()
@common_options.test_option()
@common_options.output_plot_metric_option()
......@@ -429,7 +518,7 @@ def hist(ctx, output, scores, criter, test, nbins, **kargs):
@common_options.n_bins_option()
@verbosity_option()
@click.pass_context
def evaluate(ctx, scores, tablefmt, test, log, **kargs):
def evaluate(ctx, scores, tablefmt, test, output, log, **kargs):
'''Runs error analysis on score sets
1. Computes the threshold using either EER or min. HTER criteria on
development set scores
......@@ -458,6 +547,8 @@ def evaluate(ctx, scores, tablefmt, test, log, **kargs):
click.echo("Computing metrics with HTER...")
ctx.invoke(metrics, criter='hter', scores=scores, log=log,
test=test, open_mode='a', tablefmt=tablefmt)
if log is not None:
click.echo("[metrics] => %s" % log)
#avoid closing pdf file before all figures are plotted
ctx.meta['closef'] = False
......@@ -477,4 +568,5 @@ def evaluate(ctx, scores, tablefmt, test, log, **kargs):
click.echo("Computing score histograms...")
ctx.forward(hist)
click.echo("Evaluate successfully completed!")
click.echo("[plots] => %s" % output)
return 0
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment