Commit a56c4d88 authored by Amir MOHAMMADI's avatar Amir MOHAMMADI

Helpers for creating common commands

parent a143c51f
Pipeline #21201 passed with stage
in 15 minutes and 12 seconds
''' Click commands for ``bob.measure`` '''
import click
from .. import load
from . import figure
from . import common_options
from bob.extension.scripts.click_helper import (verbosity_option,
open_file_mode_option)
@click.command()
@common_options.scores_argument(nargs=-1)
@common_options.eval_option()
@common_options.table_option()
@common_options.output_log_metric_option()
@common_options.criterion_option()
@common_options.thresholds_option()
@common_options.far_option()
@common_options.legends_option()
@open_file_mode_option()
@verbosity_option()
@click.pass_context
def metrics(ctx, scores, evaluation, **kwargs):
"""Prints a table that contains FtA, FAR, FRR, FMR, FMNR, HTER for a given
threshold criterion (eer or min-hter).
You need to provide one or more development score file(s) for each
experiment. You can also provide evaluation files along with dev files. If
evaluation scores are provided, you must use flag `--eval`.
Resulting table format can be changed using the `--tablefmt`.
Examples:
$ bob measure metrics dev-scores
SCORE_FORMAT = (
"The command takes as input generic 2-column data format as "
"specified in the documentation of "":py:func:`bob.measure.load.split`")
CRITERIA = ('eer', 'min-hter', 'far')
$ bob measure metrics -e -l results.txt dev-scores1 eval-scores1
$ bob measure metrics -e {dev,eval}-scores1 {dev,eval}-scores2
"""
@common_options.metrics_command(common_options.METRICS_HELP.format(
names='FtA, FAR, FRR, FMR, FMNR, HTER',
criteria=CRITERIA, score_format=SCORE_FORMAT,
command='bob measure metrics'), criteria=CRITERIA)
def metrics(ctx, scores, evaluation, **kwargs):
process = figure.Metrics(ctx, scores, evaluation, load.split)
process.run()
@click.command()
@common_options.scores_argument(nargs=-1)
@common_options.titles_option()
@common_options.legends_option()
@common_options.no_legend_option()
@common_options.legend_loc_option(dflt=None)
@common_options.sep_dev_eval_option()
@common_options.output_plot_file_option(default_out='roc.pdf')
@common_options.eval_option()
@common_options.points_curve_option()
@common_options.axes_val_option()
@common_options.min_far_option()
@common_options.x_rotation_option()
@common_options.x_label_option()
@common_options.y_label_option()
@common_options.lines_at_option()
@common_options.const_layout_option()
@common_options.figsize_option()
@common_options.style_option()
@common_options.linestyles_option()
@verbosity_option()
@click.pass_context
@common_options.roc_command(
common_options.ROC_HELP.format(
score_format=SCORE_FORMAT, command='bob measure roc'))
def roc(ctx, scores, evaluation, **kwargs):
"""Plot ROC (receiver operating characteristic) curve:
The plot will represent the false match rate on the horizontal axis and the
false non match rate on the vertical axis. The values for the axis will be
computed using :py:func:`bob.measure.roc`.
You need to provide one or more development score file(s) for each
experiment. You can also provide evaluation files along with dev files. If
evaluation scores are provided, you must use flag `--eval`.
Examples:
$ bob measure roc -v dev-scores
$ bob measure roc -e -v dev-scores1 eval-scores1 dev-scores2
eval-scores2
$ bob measure roc -e -v -o my_roc.pdf dev-scores1 eval-scores1
"""
process = figure.Roc(ctx, scores, evaluation, load.split)
process.run()
@click.command()
@common_options.scores_argument(nargs=-1)
@common_options.output_plot_file_option(default_out='det.pdf')
@common_options.titles_option()
@common_options.legends_option()
@common_options.no_legend_option()
@common_options.legend_loc_option(dflt='upper-right')
@common_options.sep_dev_eval_option()
@common_options.eval_option()
@common_options.axes_val_option(dflt='0.01,95,0.01,95')
@common_options.min_far_option()
@common_options.x_rotation_option(dflt=45)
@common_options.x_label_option()
@common_options.y_label_option()
@common_options.points_curve_option()
@common_options.lines_at_option()
@common_options.const_layout_option()
@common_options.figsize_option()
@common_options.style_option()
@common_options.linestyles_option()
@verbosity_option()
@click.pass_context
@common_options.det_command(
common_options.DET_HELP.format(
score_format=SCORE_FORMAT, command='bob measure det'))
def det(ctx, scores, evaluation, **kwargs):
"""Plot DET (detection error trade-off) curve:
modified ROC curve which plots error rates on both axes
(false positives on the x-axis and false negatives on the y-axis)
You need to provide one or more development score file(s) for each
experiment. You can also provide evaluation files along with dev files. If
evaluation scores are provided, you must use flag `--eval`.
Examples:
$ bob measure det -v dev-scores
$ bob measure det -e -v dev-scores1 eval-scores1 dev-scores2
eval-scores2
$ bob measure det -e -v -o my_det.pdf dev-scores1 eval-scores1
"""
process = figure.Det(ctx, scores, evaluation, load.split)
process.run()
@click.command()
@common_options.scores_argument(min_arg=1, force_eval=True, nargs=-1)
@common_options.output_plot_file_option(default_out='epc.pdf')
@common_options.titles_option()
@common_options.legends_option()
@common_options.no_legend_option()
@common_options.legend_loc_option(dflt='upper-center')
@common_options.points_curve_option()
@common_options.const_layout_option()
@common_options.x_label_option()
@common_options.y_label_option()
@common_options.figsize_option()
@common_options.style_option()
@common_options.linestyles_option()
@verbosity_option()
@click.pass_context
@common_options.epc_command(
common_options.EPC_HELP.format(
score_format=SCORE_FORMAT, command='bob measure epc'))
def epc(ctx, scores, **kwargs):
"""Plot EPC (expected performance curve):
plots the error rate on the eval set depending on a threshold selected
a-priori on the development set and accounts for varying relative cost
in [0; 1] of FPR and FNR when calculating the threshold.
You need to provide one or more development score and eval file(s)
for each experiment.
Examples:
$ bob measure epc -v dev-scores eval-scores
$ bob measure epc -v -o my_epc.pdf dev-scores1 eval-scores1
"""
process = figure.Epc(ctx, scores, True, load.split)
process.run()
@click.command()
@common_options.scores_argument(nargs=-1)
@common_options.output_plot_file_option(default_out='hist.pdf')
@common_options.eval_option()
@common_options.n_bins_option()
@common_options.legends_option()
@common_options.no_legend_option()
@common_options.legend_ncols_option()
@common_options.criterion_option()
@common_options.hide_dev_option()
@common_options.far_option()
@common_options.no_line_option()
@common_options.thresholds_option()
@common_options.subplot_option()
@common_options.const_layout_option()
@common_options.print_filenames_option()
@common_options.figsize_option(dflt=None)
@common_options.style_option()
@verbosity_option()
@click.pass_context
@common_options.hist_command(
common_options.HIST_HELP.format(
score_format=SCORE_FORMAT, command='bob measure hist'))
def hist(ctx, scores, evaluation, **kwargs):
""" Plots histograms of positive and negatives along with threshold
criterion.
You need to provide one or more development score file(s) for each
experiment. You can also provide evaluation files along with dev files. If
evaluation scores are provided, you must use the `--eval` flag. The
threshold is always computed from development score files.
By default, when eval-scores are given, only eval-scores histograms are
displayed with threshold line computed from dev-scores.
Examples:
$ bob measure hist -v dev-scores
$ bob measure hist -e -v dev-scores1 eval-scores1 dev-scores2
eval-scores2
$ bob measure hist -e -v --criterion min-hter dev-scores1 eval-scores1
"""
process = figure.Hist(ctx, scores, evaluation, load.split)
process.run()
@click.command()
@common_options.scores_argument(nargs=-1)
@common_options.legends_option()
@common_options.sep_dev_eval_option()
@common_options.table_option()
@common_options.eval_option()
@common_options.criterion_option()
@common_options.far_option()
@common_options.output_log_metric_option()
@common_options.output_plot_file_option(default_out='eval_plots.pdf')
@common_options.lines_at_option()
@common_options.points_curve_option()
@common_options.const_layout_option()
@common_options.figsize_option(dflt=None)
@common_options.style_option()
@common_options.linestyles_option()
@verbosity_option()
@click.pass_context
@common_options.evaluate_command(
common_options.EVALUATE_HELP.format(
score_format=SCORE_FORMAT, command='bob measure evaluate'),
criteria=CRITERIA)
def evaluate(ctx, scores, evaluation, **kwargs):
'''Runs error analysis on score sets
\b
1. Computes the threshold using a criteria (EER by default) on
development set scores
2. Applies the above threshold on evaluation set scores to compute the
HTER if a eval-score (use --eval) set is provided.
3. Reports error rates on the console or in a log file.
4. Plots ROC, EPC, DET curves and score distributions to a multi-page PDF
file
You need to provide 1 or 2 score files for each biometric system in this
order:
\b
* development scores
* evaluation scores
Examples:
$ bob measure evaluate -v dev-scores
$ bob measure evaluate -e -v scores-dev1 scores-eval1 scores-dev2
scores-eval2
$ bob measure evaluate -e -v /path/to/sys-{1,2,3}/scores-{dev,eval}
$ bob measure evaluate -v -l metrics.txt -o my_plots.pdf dev-scores
'''
# open_mode is always write in this command.
ctx.meta['open_mode'] = 'w'
criterion = ctx.meta.get('criterion')
if criterion is not None:
click.echo("Computing metrics with %s..." % criterion)
ctx.invoke(metrics, scores=scores, evaluation=evaluation)
if 'log' in ctx.meta:
click.echo("[metrics] => %s" % ctx.meta['log'])
# avoid closing pdf file before all figures are plotted
ctx.meta['closef'] = False
if evaluation:
click.echo("Starting evaluate with dev and eval scores...")
else:
click.echo("Starting evaluate with dev scores only...")
click.echo("Computing ROC...")
# set axes limits for ROC
ctx.forward(roc) # use class defaults plot settings
click.echo("Computing DET...")
ctx.forward(det) # use class defaults plot settings
if evaluation:
click.echo("Computing EPC...")
ctx.forward(epc) # use class defaults plot settings
# the last one closes the file
ctx.meta['closef'] = True
click.echo("Computing score histograms...")
ctx.meta['criterion'] = 'eer' # no criterion passed in evaluate
ctx.forward(hist)
click.echo("Evaluate successfully completed!")
click.echo("[plots] => %s" % (ctx.meta['output']))
common_options.evaluate_flow(
ctx, scores, evaluation, metrics, roc, det, epc, hist, **kwargs)
'''Stores click common options for plots'''
import functools
import logging
import click
from click.types import INT, FLOAT
import matplotlib.pyplot as plt
import tabulate
from matplotlib.backends.backend_pdf import PdfPages
from bob.extension.scripts.click_helper import (bool_option, list_float_option)
from bob.extension.scripts.click_helper import (
bool_option, list_float_option, verbosity_option, open_file_mode_option)
LOGGER = logging.getLogger(__name__)
......@@ -567,3 +569,353 @@ def style_option(**kwargs):
'multiple styles by repeating this option',
callback=callback, **kwargs)(func)
return custom_style_option
def metrics_command(docstring, criteria=('eer', 'min-hter', 'far')):
def custom_metrics_command(func):
func.__doc__ = docstring
@click.command()
@scores_argument(nargs=-1)
@eval_option()
@table_option()
@output_log_metric_option()
@criterion_option(criteria)
@thresholds_option()
@far_option()
@legends_option()
@open_file_mode_option()
@verbosity_option()
@click.pass_context
@functools.wraps(func)
def wrapper(*args, **kwds):
return func(*args, **kwds)
return wrapper
return custom_metrics_command
METRICS_HELP = """Prints a table that contains {names} for a given
threshold criterion ({criteria}).
You need to provide one or more development score file(s) for each
experiment. You can also provide evaluation files along with dev files. If
evaluation scores are provided, you must use flag `--eval`.
{score_format}
Resulting table format can be changed using the `--tablefmt`.
Examples:
$ {command} -v scores-dev
$ {command} -e -l results.txt sys1/scores-{{dev,eval}}
$ {command} -e {{sys1,sys2}}/scores-{{dev,eval}}
"""
def roc_command(docstring):
def custom_roc_command(func):
func.__doc__ = docstring
@click.command()
@scores_argument(nargs=-1)
@titles_option()
@legends_option()
@no_legend_option()
@legend_loc_option(dflt=None)
@sep_dev_eval_option()
@output_plot_file_option(default_out='roc.pdf')
@eval_option()
@semilogx_option(True)
@lines_at_option()
@axes_val_option()
@min_far_option()
@x_rotation_option()
@x_label_option()
@y_label_option()
@points_curve_option()
@const_layout_option()
@figsize_option()
@style_option()
@linestyles_option()
@verbosity_option()
@click.pass_context
@functools.wraps(func)
def wrapper(*args, **kwds):
return func(*args, **kwds)
return wrapper
return custom_roc_command
ROC_HELP = """Plot ROC (receiver operating characteristic) curve.
The plot will represent the false match rate on the horizontal axis and the
false non match rate on the vertical axis. The values for the axis will be
computed using :py:func:`bob.measure.roc`.
You need to provide one or more development score file(s) for each
experiment. You can also provide evaluation files along with dev files. If
evaluation scores are provided, you must use flag `--eval`.
{score_format}
Examples:
$ {command} -v scores-dev
$ {command} -e -v sys1/scores-{{dev,eval}}
$ {command} -e -v -o my_roc.pdf {{sys1,sys2}}/scores-{{dev,eval}}
"""
def det_command(docstring):
def custom_det_command(func):
func.__doc__ = docstring
@click.command()
@scores_argument(nargs=-1)
@output_plot_file_option(default_out='det.pdf')
@titles_option()
@legends_option()
@no_legend_option()
@legend_loc_option(dflt='upper-right')
@sep_dev_eval_option()
@eval_option()
@axes_val_option(dflt='0.01,95,0.01,95')
@min_far_option()
@x_rotation_option(dflt=45)
@x_label_option()
@y_label_option()
@points_curve_option()
@lines_at_option()
@const_layout_option()
@figsize_option()
@style_option()
@linestyles_option()
@verbosity_option()
@click.pass_context
@functools.wraps(func)
def wrapper(*args, **kwds):
return func(*args, **kwds)
return wrapper
return custom_det_command
DET_HELP = """Plot DET (detection error trade-off) curve.
modified ROC curve which plots error rates on both axes
(false positives on the x-axis and false negatives on the y-axis).
You need to provide one or more development score file(s) for each
experiment. You can also provide evaluation files along with dev files. If
evaluation scores are provided, you must use flag `--eval`.
{score_format}
Examples:
$ {command} -v scores-dev
$ {command} -e -v sys1/scores-{{dev,eval}}
$ {command} -e -v -o my_det.pdf {{sys1,sys2}}/scores-{{dev,eval}}
"""
def epc_command(docstring):
def custom_epc_command(func):
func.__doc__ = docstring
@click.command()
@scores_argument(min_arg=1, force_eval=True, nargs=-1)
@output_plot_file_option(default_out='epc.pdf')
@titles_option()
@legends_option()
@no_legend_option()
@legend_loc_option(dflt='upper-center')
@points_curve_option()
@const_layout_option()
@x_label_option()
@y_label_option()
@figsize_option()
@style_option()
@linestyles_option()
@verbosity_option()
@click.pass_context
@functools.wraps(func)
def wrapper(*args, **kwds):
return func(*args, **kwds)
return wrapper
return custom_epc_command
EPC_HELP = """Plot EPC (expected performance curve).
plots the error rate on the eval set depending on a threshold selected
a-priori on the development set and accounts for varying relative cost
in [0; 1] of FPR and FNR when calculating the threshold.
You need to provide one or more development score and eval file(s)
for each experiment.
{score_format}
Examples:
$ {command} -v scores-{{dev,eval}}
$ {command} -v -o my_epc.pdf {{sys1,sys2}}/scores-{{dev,eval}}
"""
def hist_command(docstring):
def custom_hist_command(func):
func.__doc__ = docstring
@click.command()
@scores_argument(nargs=-1)
@output_plot_file_option(default_out='hist.pdf')
@eval_option()
@hide_dev_option()
@n_bins_option()
@legends_option()
@no_legend_option()
@legend_ncols_option()
@criterion_option()
@far_option()
@no_line_option()
@thresholds_option()
@subplot_option()
@const_layout_option()
@print_filenames_option()
@figsize_option(dflt=None)
@style_option()
@verbosity_option()
@click.pass_context
@functools.wraps(func)
def wrapper(*args, **kwds):
return func(*args, **kwds)
return wrapper
return custom_hist_command
HIST_HELP = """ Plots histograms of positive and negatives along with threshold
criterion.
You need to provide one or more development score file(s) for each
experiment. You can also provide evaluation files along with dev files. If
evaluation scores are provided, you must use the `--eval` flag. The
threshold is always computed from development score files.
By default, when eval-scores are given, only eval-scores histograms are
displayed with threshold line computed from dev-scores.
{score_format}
Examples:
$ {command} -v scores-dev
$ {command} -e -v sys1/scores-{{dev,eval}}
$ {command} -e -v --criterion min-hter {{sys1,sys2}}/scores-{{dev,eval}}
"""
def evaluate_command(docstring, criteria=('eer', 'min-hter', 'far')):
def custom_evaluate_command(func):
func.__doc__ = docstring
@click.command()
@scores_argument(nargs=-1)
@legends_option()
@sep_dev_eval_option()
@table_option()
@eval_option()
@criterion_option(criteria)
@far_option()
@output_log_metric_option()
@output_plot_file_option(default_out='eval_plots.pdf')
@lines_at_option()
@points_curve_option()
@const_layout_option()
@figsize_option(dflt=None)
@style_option()
@linestyles_option()
@verbosity_option()
@click.pass_context
@functools.wraps(func)
def wrapper(*args, **kwds):
return func(*args, **kwds)
return wrapper
return custom_evaluate_command
EVALUATE_HELP = '''Runs error analysis on score sets.
\b
1. Computes the threshold using a criteria (EER by default) on
development set scores
2. Applies the above threshold on evaluation set scores to compute the
HTER if a eval-score (use --eval) set is provided.
3. Reports error rates on the console or in a log file.
4. Plots ROC, DET, and EPC curves and score distributions to a multi-page
PDF file
You need to provide 1 or 2 score files for each biometric system in this
order:
\b
* development scores
* evaluation scores
{score_format}
Examples:
$ {command} -v dev-scores
$ {command} -v /path/to/sys-{{1,2,3}}/scores-dev
$ {command} -e -v /path/to/sys-{{1,2,3}}/scores-{{dev,eval}}
$ {command} -v -l metrics.txt -o my_plots.pdf dev-scores
This command is a combination of metrics, roc, det, epc, and hist commands.
If you want more flexibility in your plots, please use the individual
commands.
'''
def evaluate_flow(ctx, scores, evaluation, metrics, roc, det, epc, hist,
**kwargs):
# open_mode is always write in this command.
ctx.meta['open_mode'] = 'w'
criterion = ctx.meta.get('criterion')
if criterion is not None:
click.echo("Computing metrics with %s..." % criterion)
ctx.invoke(metrics, scores=scores, evaluation=evaluation)
if 'log' in ctx.meta:
click.echo("[metrics] => %s" % ctx.meta['log'])
# avoid closing pdf file before all figures are plotted
ctx.meta['closef'] = False
if evaluation:
click.echo("Starting evaluate with dev and eval scores...")
else:
click.echo("Starting evaluate with dev scores only...")
click.echo("Computing ROC...")
# set axes limits for ROC
ctx.forward(roc) # use class defaults plot settings
click.echo("Computing DET...")
ctx.forward(det) # use class defaults plot settings
if evaluation:
click.echo("Computing EPC...")
ctx.forward(epc) # use class defaults plot settings
# the last one closes the file
ctx.meta['closef'] = True
click.echo("Computing score histograms...")
ctx.meta['criterion'] = 'eer' # no criterion passed in evaluate
ctx.forward(hist)
click.echo("Evaluate successfully completed!")
click.echo("[plots] => %s" % (ctx.meta['output']))
......@@ -160,8 +160,10 @@ class Metrics(MeasureBase):
output stream
'''
def __init__(self, ctx, scores, evaluation, func_load):
def __init__(self, ctx, scores, evaluation, func_load,
names=('FtA', 'FMR', 'FNMR', 'FAR', 'FRR', 'HTER')):
super(Metrics, self).__init__(ctx, scores, evaluation, func_load)
self.names = names
self._tablefmt = ctx.meta.get('tablefmt')
self._criterion = ctx.meta.get('criterion')
self._open_mode = ctx.meta.get('open_mode')
......@@ -225,12 +227,12 @@ class Metrics(MeasureBase):
dev_frr_str = "%.1f%%" % (100 * dev_frr)
dev_hter_str = "%.1f%%" % (100 * dev_hter)
headers = ['' or title, 'Development %s' % dev_file]
raws = [['FtA', dev_fta_str],
['FMR', dev_fmr_str],
['FNMR', dev_fnmr_str],
['FAR', dev_far_str],
['FRR', dev_frr_str],
['HTER', dev_hter_str]]
raws = [[self.names[0], dev_fta_str],
[self.names[1], dev_fmr_str],
[self.names[2], dev_fnmr_str],
[self.names[3], dev_far_str],
[self.names[4], dev_frr_str],