diff --git a/bob/measure/script/commands.py b/bob/measure/script/commands.py index 4d7550ef43c2cd27e512e9e02f49e8c511a1d599..88852eedc928d79db9bba7782ad6810c2ff07704 100644 --- a/bob/measure/script/commands.py +++ b/bob/measure/script/commands.py @@ -1,289 +1,61 @@ ''' Click commands for ``bob.measure`` ''' - -import click from .. import load from . import figure from . import common_options -from bob.extension.scripts.click_helper import (verbosity_option, - open_file_mode_option) - - -@click.command() -@common_options.scores_argument(nargs=-1) -@common_options.eval_option() -@common_options.table_option() -@common_options.output_log_metric_option() -@common_options.criterion_option() -@common_options.thresholds_option() -@common_options.far_option() -@common_options.legends_option() -@open_file_mode_option() -@verbosity_option() -@click.pass_context -def metrics(ctx, scores, evaluation, **kwargs): - """Prints a table that contains FtA, FAR, FRR, FMR, FMNR, HTER for a given - threshold criterion (eer or min-hter). - - You need to provide one or more development score file(s) for each - experiment. You can also provide evaluation files along with dev files. If - evaluation scores are provided, you must use flag `--eval`. - Resulting table format can be changed using the `--tablefmt`. - Examples: - $ bob measure metrics dev-scores +SCORE_FORMAT = ( + "The command takes as input generic 2-column data format as " + "specified in the documentation of "":py:func:`bob.measure.load.split`") +CRITERIA = ('eer', 'min-hter', 'far') - $ bob measure metrics -e -l results.txt dev-scores1 eval-scores1 - $ bob measure metrics -e {dev,eval}-scores1 {dev,eval}-scores2 - """ +@common_options.metrics_command(common_options.METRICS_HELP.format( + names='FtA, FAR, FRR, FMR, FMNR, HTER', + criteria=CRITERIA, score_format=SCORE_FORMAT, + command='bob measure metrics'), criteria=CRITERIA) +def metrics(ctx, scores, evaluation, **kwargs): process = figure.Metrics(ctx, scores, evaluation, load.split) process.run() -@click.command() -@common_options.scores_argument(nargs=-1) -@common_options.titles_option() -@common_options.legends_option() -@common_options.no_legend_option() -@common_options.legend_loc_option(dflt=None) -@common_options.sep_dev_eval_option() -@common_options.output_plot_file_option(default_out='roc.pdf') -@common_options.eval_option() -@common_options.points_curve_option() -@common_options.axes_val_option() -@common_options.min_far_option() -@common_options.x_rotation_option() -@common_options.x_label_option() -@common_options.y_label_option() -@common_options.lines_at_option() -@common_options.const_layout_option() -@common_options.figsize_option() -@common_options.style_option() -@common_options.linestyles_option() -@verbosity_option() -@click.pass_context +@common_options.roc_command( + common_options.ROC_HELP.format( + score_format=SCORE_FORMAT, command='bob measure roc')) def roc(ctx, scores, evaluation, **kwargs): - """Plot ROC (receiver operating characteristic) curve: - The plot will represent the false match rate on the horizontal axis and the - false non match rate on the vertical axis. The values for the axis will be - computed using :py:func:`bob.measure.roc`. - - You need to provide one or more development score file(s) for each - experiment. You can also provide evaluation files along with dev files. If - evaluation scores are provided, you must use flag `--eval`. - - Examples: - $ bob measure roc -v dev-scores - - $ bob measure roc -e -v dev-scores1 eval-scores1 dev-scores2 - eval-scores2 - - $ bob measure roc -e -v -o my_roc.pdf dev-scores1 eval-scores1 - """ process = figure.Roc(ctx, scores, evaluation, load.split) process.run() -@click.command() -@common_options.scores_argument(nargs=-1) -@common_options.output_plot_file_option(default_out='det.pdf') -@common_options.titles_option() -@common_options.legends_option() -@common_options.no_legend_option() -@common_options.legend_loc_option(dflt='upper-right') -@common_options.sep_dev_eval_option() -@common_options.eval_option() -@common_options.axes_val_option(dflt='0.01,95,0.01,95') -@common_options.min_far_option() -@common_options.x_rotation_option(dflt=45) -@common_options.x_label_option() -@common_options.y_label_option() -@common_options.points_curve_option() -@common_options.lines_at_option() -@common_options.const_layout_option() -@common_options.figsize_option() -@common_options.style_option() -@common_options.linestyles_option() -@verbosity_option() -@click.pass_context +@common_options.det_command( + common_options.DET_HELP.format( + score_format=SCORE_FORMAT, command='bob measure det')) def det(ctx, scores, evaluation, **kwargs): - """Plot DET (detection error trade-off) curve: - modified ROC curve which plots error rates on both axes - (false positives on the x-axis and false negatives on the y-axis) - - You need to provide one or more development score file(s) for each - experiment. You can also provide evaluation files along with dev files. If - evaluation scores are provided, you must use flag `--eval`. - - Examples: - $ bob measure det -v dev-scores - - $ bob measure det -e -v dev-scores1 eval-scores1 dev-scores2 - eval-scores2 - - $ bob measure det -e -v -o my_det.pdf dev-scores1 eval-scores1 - """ process = figure.Det(ctx, scores, evaluation, load.split) process.run() -@click.command() -@common_options.scores_argument(min_arg=1, force_eval=True, nargs=-1) -@common_options.output_plot_file_option(default_out='epc.pdf') -@common_options.titles_option() -@common_options.legends_option() -@common_options.no_legend_option() -@common_options.legend_loc_option(dflt='upper-center') -@common_options.points_curve_option() -@common_options.const_layout_option() -@common_options.x_label_option() -@common_options.y_label_option() -@common_options.figsize_option() -@common_options.style_option() -@common_options.linestyles_option() -@verbosity_option() -@click.pass_context +@common_options.epc_command( + common_options.EPC_HELP.format( + score_format=SCORE_FORMAT, command='bob measure epc')) def epc(ctx, scores, **kwargs): - """Plot EPC (expected performance curve): - plots the error rate on the eval set depending on a threshold selected - a-priori on the development set and accounts for varying relative cost - in [0; 1] of FPR and FNR when calculating the threshold. - - You need to provide one or more development score and eval file(s) - for each experiment. - - Examples: - $ bob measure epc -v dev-scores eval-scores - - $ bob measure epc -v -o my_epc.pdf dev-scores1 eval-scores1 - """ process = figure.Epc(ctx, scores, True, load.split) process.run() -@click.command() -@common_options.scores_argument(nargs=-1) -@common_options.output_plot_file_option(default_out='hist.pdf') -@common_options.eval_option() -@common_options.n_bins_option() -@common_options.legends_option() -@common_options.no_legend_option() -@common_options.legend_ncols_option() -@common_options.criterion_option() -@common_options.hide_dev_option() -@common_options.far_option() -@common_options.no_line_option() -@common_options.thresholds_option() -@common_options.subplot_option() -@common_options.const_layout_option() -@common_options.print_filenames_option() -@common_options.figsize_option(dflt=None) -@common_options.style_option() -@verbosity_option() -@click.pass_context +@common_options.hist_command( + common_options.HIST_HELP.format( + score_format=SCORE_FORMAT, command='bob measure hist')) def hist(ctx, scores, evaluation, **kwargs): - """ Plots histograms of positive and negatives along with threshold - criterion. - - You need to provide one or more development score file(s) for each - experiment. You can also provide evaluation files along with dev files. If - evaluation scores are provided, you must use the `--eval` flag. The - threshold is always computed from development score files. - - By default, when eval-scores are given, only eval-scores histograms are - displayed with threshold line computed from dev-scores. - - Examples: - - $ bob measure hist -v dev-scores - - $ bob measure hist -e -v dev-scores1 eval-scores1 dev-scores2 - eval-scores2 - - $ bob measure hist -e -v --criterion min-hter dev-scores1 eval-scores1 - """ process = figure.Hist(ctx, scores, evaluation, load.split) process.run() -@click.command() -@common_options.scores_argument(nargs=-1) -@common_options.legends_option() -@common_options.sep_dev_eval_option() -@common_options.table_option() -@common_options.eval_option() -@common_options.criterion_option() -@common_options.far_option() -@common_options.output_log_metric_option() -@common_options.output_plot_file_option(default_out='eval_plots.pdf') -@common_options.lines_at_option() -@common_options.points_curve_option() -@common_options.const_layout_option() -@common_options.figsize_option(dflt=None) -@common_options.style_option() -@common_options.linestyles_option() -@verbosity_option() -@click.pass_context +@common_options.evaluate_command( + common_options.EVALUATE_HELP.format( + score_format=SCORE_FORMAT, command='bob measure evaluate'), + criteria=CRITERIA) def evaluate(ctx, scores, evaluation, **kwargs): - '''Runs error analysis on score sets - - \b - 1. Computes the threshold using a criteria (EER by default) on - development set scores - 2. Applies the above threshold on evaluation set scores to compute the - HTER if a eval-score (use --eval) set is provided. - 3. Reports error rates on the console or in a log file. - 4. Plots ROC, EPC, DET curves and score distributions to a multi-page PDF - file - - You need to provide 1 or 2 score files for each biometric system in this - order: - - \b - * development scores - * evaluation scores - - Examples: - - $ bob measure evaluate -v dev-scores - - $ bob measure evaluate -e -v scores-dev1 scores-eval1 scores-dev2 - scores-eval2 - - $ bob measure evaluate -e -v /path/to/sys-{1,2,3}/scores-{dev,eval} - - $ bob measure evaluate -v -l metrics.txt -o my_plots.pdf dev-scores - ''' - # open_mode is always write in this command. - ctx.meta['open_mode'] = 'w' - criterion = ctx.meta.get('criterion') - if criterion is not None: - click.echo("Computing metrics with %s..." % criterion) - ctx.invoke(metrics, scores=scores, evaluation=evaluation) - - if 'log' in ctx.meta: - click.echo("[metrics] => %s" % ctx.meta['log']) - - # avoid closing pdf file before all figures are plotted - ctx.meta['closef'] = False - if evaluation: - click.echo("Starting evaluate with dev and eval scores...") - else: - click.echo("Starting evaluate with dev scores only...") - click.echo("Computing ROC...") - # set axes limits for ROC - ctx.forward(roc) # use class defaults plot settings - click.echo("Computing DET...") - ctx.forward(det) # use class defaults plot settings - if evaluation: - click.echo("Computing EPC...") - ctx.forward(epc) # use class defaults plot settings - # the last one closes the file - ctx.meta['closef'] = True - click.echo("Computing score histograms...") - ctx.meta['criterion'] = 'eer' # no criterion passed in evaluate - ctx.forward(hist) - click.echo("Evaluate successfully completed!") - click.echo("[plots] => %s" % (ctx.meta['output'])) + common_options.evaluate_flow( + ctx, scores, evaluation, metrics, roc, det, epc, hist, **kwargs) diff --git a/bob/measure/script/common_options.py b/bob/measure/script/common_options.py index 904b940f08faedddcc4817c5a389a5a536c46631..f5204a922ba0257dc253906b50e81b9f62d9dc26 100644 --- a/bob/measure/script/common_options.py +++ b/bob/measure/script/common_options.py @@ -1,12 +1,14 @@ '''Stores click common options for plots''' +import functools import logging import click from click.types import INT, FLOAT import matplotlib.pyplot as plt import tabulate from matplotlib.backends.backend_pdf import PdfPages -from bob.extension.scripts.click_helper import (bool_option, list_float_option) +from bob.extension.scripts.click_helper import ( + bool_option, list_float_option, verbosity_option, open_file_mode_option) LOGGER = logging.getLogger(__name__) @@ -567,3 +569,353 @@ def style_option(**kwargs): 'multiple styles by repeating this option', callback=callback, **kwargs)(func) return custom_style_option + + +def metrics_command(docstring, criteria=('eer', 'min-hter', 'far')): + def custom_metrics_command(func): + func.__doc__ = docstring + + @click.command() + @scores_argument(nargs=-1) + @eval_option() + @table_option() + @output_log_metric_option() + @criterion_option(criteria) + @thresholds_option() + @far_option() + @legends_option() + @open_file_mode_option() + @verbosity_option() + @click.pass_context + @functools.wraps(func) + def wrapper(*args, **kwds): + return func(*args, **kwds) + return wrapper + return custom_metrics_command + + +METRICS_HELP = """Prints a table that contains {names} for a given + threshold criterion ({criteria}). + + You need to provide one or more development score file(s) for each + experiment. You can also provide evaluation files along with dev files. If + evaluation scores are provided, you must use flag `--eval`. + + {score_format} + + Resulting table format can be changed using the `--tablefmt`. + + Examples: + + $ {command} -v scores-dev + + $ {command} -e -l results.txt sys1/scores-{{dev,eval}} + + $ {command} -e {{sys1,sys2}}/scores-{{dev,eval}} + """ + + +def roc_command(docstring): + def custom_roc_command(func): + func.__doc__ = docstring + + @click.command() + @scores_argument(nargs=-1) + @titles_option() + @legends_option() + @no_legend_option() + @legend_loc_option(dflt=None) + @sep_dev_eval_option() + @output_plot_file_option(default_out='roc.pdf') + @eval_option() + @semilogx_option(True) + @lines_at_option() + @axes_val_option() + @min_far_option() + @x_rotation_option() + @x_label_option() + @y_label_option() + @points_curve_option() + @const_layout_option() + @figsize_option() + @style_option() + @linestyles_option() + @verbosity_option() + @click.pass_context + @functools.wraps(func) + def wrapper(*args, **kwds): + return func(*args, **kwds) + return wrapper + return custom_roc_command + + +ROC_HELP = """Plot ROC (receiver operating characteristic) curve. + The plot will represent the false match rate on the horizontal axis and the + false non match rate on the vertical axis. The values for the axis will be + computed using :py:func:`bob.measure.roc`. + + You need to provide one or more development score file(s) for each + experiment. You can also provide evaluation files along with dev files. If + evaluation scores are provided, you must use flag `--eval`. + + {score_format} + + Examples: + + $ {command} -v scores-dev + + $ {command} -e -v sys1/scores-{{dev,eval}} + + $ {command} -e -v -o my_roc.pdf {{sys1,sys2}}/scores-{{dev,eval}} + """ + + +def det_command(docstring): + def custom_det_command(func): + func.__doc__ = docstring + + @click.command() + @scores_argument(nargs=-1) + @output_plot_file_option(default_out='det.pdf') + @titles_option() + @legends_option() + @no_legend_option() + @legend_loc_option(dflt='upper-right') + @sep_dev_eval_option() + @eval_option() + @axes_val_option(dflt='0.01,95,0.01,95') + @min_far_option() + @x_rotation_option(dflt=45) + @x_label_option() + @y_label_option() + @points_curve_option() + @lines_at_option() + @const_layout_option() + @figsize_option() + @style_option() + @linestyles_option() + @verbosity_option() + @click.pass_context + @functools.wraps(func) + def wrapper(*args, **kwds): + return func(*args, **kwds) + return wrapper + return custom_det_command + + +DET_HELP = """Plot DET (detection error trade-off) curve. + modified ROC curve which plots error rates on both axes + (false positives on the x-axis and false negatives on the y-axis). + + You need to provide one or more development score file(s) for each + experiment. You can also provide evaluation files along with dev files. If + evaluation scores are provided, you must use flag `--eval`. + + {score_format} + + Examples: + + $ {command} -v scores-dev + + $ {command} -e -v sys1/scores-{{dev,eval}} + + $ {command} -e -v -o my_det.pdf {{sys1,sys2}}/scores-{{dev,eval}} + """ + + +def epc_command(docstring): + def custom_epc_command(func): + func.__doc__ = docstring + + @click.command() + @scores_argument(min_arg=1, force_eval=True, nargs=-1) + @output_plot_file_option(default_out='epc.pdf') + @titles_option() + @legends_option() + @no_legend_option() + @legend_loc_option(dflt='upper-center') + @points_curve_option() + @const_layout_option() + @x_label_option() + @y_label_option() + @figsize_option() + @style_option() + @linestyles_option() + @verbosity_option() + @click.pass_context + @functools.wraps(func) + def wrapper(*args, **kwds): + return func(*args, **kwds) + return wrapper + return custom_epc_command + + +EPC_HELP = """Plot EPC (expected performance curve). + plots the error rate on the eval set depending on a threshold selected + a-priori on the development set and accounts for varying relative cost + in [0; 1] of FPR and FNR when calculating the threshold. + + You need to provide one or more development score and eval file(s) + for each experiment. + + {score_format} + + Examples: + + $ {command} -v scores-{{dev,eval}} + + $ {command} -v -o my_epc.pdf {{sys1,sys2}}/scores-{{dev,eval}} + """ + + +def hist_command(docstring): + def custom_hist_command(func): + func.__doc__ = docstring + + @click.command() + @scores_argument(nargs=-1) + @output_plot_file_option(default_out='hist.pdf') + @eval_option() + @hide_dev_option() + @n_bins_option() + @legends_option() + @no_legend_option() + @legend_ncols_option() + @criterion_option() + @far_option() + @no_line_option() + @thresholds_option() + @subplot_option() + @const_layout_option() + @print_filenames_option() + @figsize_option(dflt=None) + @style_option() + @verbosity_option() + @click.pass_context + @functools.wraps(func) + def wrapper(*args, **kwds): + return func(*args, **kwds) + return wrapper + return custom_hist_command + + +HIST_HELP = """ Plots histograms of positive and negatives along with threshold + criterion. + + You need to provide one or more development score file(s) for each + experiment. You can also provide evaluation files along with dev files. If + evaluation scores are provided, you must use the `--eval` flag. The + threshold is always computed from development score files. + + By default, when eval-scores are given, only eval-scores histograms are + displayed with threshold line computed from dev-scores. + + {score_format} + + Examples: + + $ {command} -v scores-dev + + $ {command} -e -v sys1/scores-{{dev,eval}} + + $ {command} -e -v --criterion min-hter {{sys1,sys2}}/scores-{{dev,eval}} + """ + + +def evaluate_command(docstring, criteria=('eer', 'min-hter', 'far')): + def custom_evaluate_command(func): + func.__doc__ = docstring + + @click.command() + @scores_argument(nargs=-1) + @legends_option() + @sep_dev_eval_option() + @table_option() + @eval_option() + @criterion_option(criteria) + @far_option() + @output_log_metric_option() + @output_plot_file_option(default_out='eval_plots.pdf') + @lines_at_option() + @points_curve_option() + @const_layout_option() + @figsize_option(dflt=None) + @style_option() + @linestyles_option() + @verbosity_option() + @click.pass_context + @functools.wraps(func) + def wrapper(*args, **kwds): + return func(*args, **kwds) + return wrapper + return custom_evaluate_command + + +EVALUATE_HELP = '''Runs error analysis on score sets. + + \b + 1. Computes the threshold using a criteria (EER by default) on + development set scores + 2. Applies the above threshold on evaluation set scores to compute the + HTER if a eval-score (use --eval) set is provided. + 3. Reports error rates on the console or in a log file. + 4. Plots ROC, DET, and EPC curves and score distributions to a multi-page + PDF file + + You need to provide 1 or 2 score files for each biometric system in this + order: + + \b + * development scores + * evaluation scores + + {score_format} + + Examples: + + $ {command} -v dev-scores + + $ {command} -v /path/to/sys-{{1,2,3}}/scores-dev + + $ {command} -e -v /path/to/sys-{{1,2,3}}/scores-{{dev,eval}} + + $ {command} -v -l metrics.txt -o my_plots.pdf dev-scores + + This command is a combination of metrics, roc, det, epc, and hist commands. + If you want more flexibility in your plots, please use the individual + commands. + ''' + + +def evaluate_flow(ctx, scores, evaluation, metrics, roc, det, epc, hist, + **kwargs): + # open_mode is always write in this command. + ctx.meta['open_mode'] = 'w' + criterion = ctx.meta.get('criterion') + if criterion is not None: + click.echo("Computing metrics with %s..." % criterion) + ctx.invoke(metrics, scores=scores, evaluation=evaluation) + if 'log' in ctx.meta: + click.echo("[metrics] => %s" % ctx.meta['log']) + + # avoid closing pdf file before all figures are plotted + ctx.meta['closef'] = False + if evaluation: + click.echo("Starting evaluate with dev and eval scores...") + else: + click.echo("Starting evaluate with dev scores only...") + click.echo("Computing ROC...") + # set axes limits for ROC + ctx.forward(roc) # use class defaults plot settings + click.echo("Computing DET...") + ctx.forward(det) # use class defaults plot settings + if evaluation: + click.echo("Computing EPC...") + ctx.forward(epc) # use class defaults plot settings + # the last one closes the file + ctx.meta['closef'] = True + click.echo("Computing score histograms...") + ctx.meta['criterion'] = 'eer' # no criterion passed in evaluate + ctx.forward(hist) + click.echo("Evaluate successfully completed!") + click.echo("[plots] => %s" % (ctx.meta['output'])) diff --git a/bob/measure/script/figure.py b/bob/measure/script/figure.py index 7df1fa4c096b5e8b5f437e55cdca0bc97159c650..1052405ac64a11eef1194d5e71a605ab1c44228e 100644 --- a/bob/measure/script/figure.py +++ b/bob/measure/script/figure.py @@ -160,8 +160,10 @@ class Metrics(MeasureBase): output stream ''' - def __init__(self, ctx, scores, evaluation, func_load): + def __init__(self, ctx, scores, evaluation, func_load, + names=('FtA', 'FMR', 'FNMR', 'FAR', 'FRR', 'HTER')): super(Metrics, self).__init__(ctx, scores, evaluation, func_load) + self.names = names self._tablefmt = ctx.meta.get('tablefmt') self._criterion = ctx.meta.get('criterion') self._open_mode = ctx.meta.get('open_mode') @@ -225,12 +227,12 @@ class Metrics(MeasureBase): dev_frr_str = "%.1f%%" % (100 * dev_frr) dev_hter_str = "%.1f%%" % (100 * dev_hter) headers = ['' or title, 'Development %s' % dev_file] - raws = [['FtA', dev_fta_str], - ['FMR', dev_fmr_str], - ['FNMR', dev_fnmr_str], - ['FAR', dev_far_str], - ['FRR', dev_frr_str], - ['HTER', dev_hter_str]] + raws = [[self.names[0], dev_fta_str], + [self.names[1], dev_fmr_str], + [self.names[2], dev_fnmr_str], + [self.names[3], dev_far_str], + [self.names[4], dev_frr_str], + [self.names[5], dev_hter_str]] if self._eval: # computes statistics for the eval set based on the threshold a priori