diff --git a/bob/bio/base/script/commands.py b/bob/bio/base/script/commands.py index 5eb7554b04e6531f5ac2b5030ac599cf728e938b..ae944eb23b7d3d8f598309435b6e429a9cf358d2 100644 --- a/bob/bio/base/script/commands.py +++ b/bob/bio/base/script/commands.py @@ -22,6 +22,7 @@ def rank_option(**kwargs): callback=callback, show_default=True, **kwargs)(func) return custom_rank_option + @click.command() @common_options.scores_argument(nargs=-1) @common_options.table_option() @@ -39,10 +40,10 @@ def metrics(ctx, scores, evaluation, **kargs): """Prints a single output line that contains all info for a given criterion (eer, min-hter, far, mindcf, cllr, rr). - You need to provide one or more development score file(s) for each experiment. - You can also provide eval files along with dev files. If only dev-scores - are used, the flag `--no-evaluation` must be used. - is required in that case. Files must be 4- or 5- columns format, see + You need to provide one or more development score file(s) for each + experiment. You can also provide eval files along with dev files. If only + dev-scores are used, the flag `--no-evaluation` must be used. is required + in that case. Files must be 4- or 5- columns format, see :py:func:`bob.bio.base.score.load.four_column` and :py:func:`bob.bio.base.score.load.five_column` for details. @@ -65,6 +66,7 @@ def metrics(ctx, scores, evaluation, **kargs): process = bio_figure.Metrics(ctx, scores, evaluation, load.split) process.run() + @click.command() @common_options.scores_argument(nargs=-1) @common_options.title_option() @@ -92,24 +94,25 @@ def roc(ctx, scores, evaluation, **kargs): false non match rate on the vertical axis. The values for the axis will be computed using :py:func:`bob.measure.roc`. - You need to provide one or more development score file(s) for each experiment. - You can also provide eval files along with dev files. If only dev-scores - are used, the flag `--no-evaluation` must be used. - is required in that case. Files must be 4- or 5- columns format, see + You need to provide one or more development score file(s) for each + experiment. You can also provide eval files along with dev files. If only + dev-scores are used, the flag `--no-evaluation` must be used. is required + in that case. Files must be 4- or 5- columns format, see :py:func:`bob.bio.base.score.load.four_column` and :py:func:`bob.bio.base.score.load.five_column` for details. Examples: - $ bob bio roc dev-scores + $ bob bio roc -v dev-scores - $ bob bio roc dev-scores1 eval-scores1 dev-scores2 + $ bob bio roc -v dev-scores1 eval-scores1 dev-scores2 eval-scores2 - $ bob bio roc -o my_roc.pdf dev-scores1 eval-scores1 + $ bob bio roc -v -o my_roc.pdf dev-scores1 eval-scores1 """ process = bio_figure.Roc(ctx, scores, evaluation, load.split) process.run() + @click.command() @common_options.scores_argument(nargs=-1) @common_options.title_option() @@ -135,24 +138,25 @@ def det(ctx, scores, evaluation, **kargs): modified ROC curve which plots error rates on both axes (false positives on the x-axis and false negatives on the y-axis) - You need to provide one or more development score file(s) for each experiment. - You can also provide eval files along with dev files. If only dev-scores - are used, the flag `--no-evaluation` must be used. - is required in that case. Files must be 4- or 5- columns format, see + You need to provide one or more development score file(s) for each + experiment. You can also provide eval files along with dev files. If only + dev-scores are used, the flag `--no-evaluation` must be used. is required + in that case. Files must be 4- or 5- columns format, see :py:func:`bob.bio.base.score.load.four_column` and :py:func:`bob.bio.base.score.load.five_column` for details. Examples: - $ bob bio det dev-scores + $ bob bio det -v dev-scores - $ bob bio det dev-scores1 eval-scores1 dev-scores2 + $ bob bio det -v dev-scores1 eval-scores1 dev-scores2 eval-scores2 - $ bob bio det -o my_det.pdf dev-scores1 eval-scores1 + $ bob bio det -v -o my_det.pdf dev-scores1 eval-scores1 """ process = bio_figure.Det(ctx, scores, evaluation, load.split) process.run() + @click.command() @common_options.scores_argument(min_arg=1, force_eval=True, nargs=-1) @common_options.title_option() @@ -177,13 +181,14 @@ def epc(ctx, scores, **kargs): :py:func:`bob.bio.base.score.load.five_column` for details. Examples: - $ bob bio epc dev-scores eval-scores + $ bob bio epc -v dev-scores eval-scores - $ bob bio epc -o my_epc.pdf dev-scores1 eval-scores1 + $ bob bio epc -v -o my_epc.pdf dev-scores1 eval-scores1 """ process = measure_figure.Epc(ctx, scores, True, load.split) process.run() + @click.command() @common_options.scores_argument(nargs=-1) @common_options.title_option() @@ -202,30 +207,31 @@ def epc(ctx, scores, **kargs): @click.pass_context def cmc(ctx, scores, evaluation, **kargs): """Plot CMC (cumulative match characteristic curve): - graphical presentation of results of an identification task eval, - plotting rank values on the x-axis and the probability of correct identification - at or below that rank on the y-axis. The values for the axis will be - computed using :py:func:`bob.measure.cmc`. - - You need to provide one or more development score file(s) for each experiment. - You can also provide eval files along with dev files. If only dev-scores - are used, the flag `--no-evaluation` must be used. - is required in that case. Files must be 4- or 5- columns format, see + graphical presentation of results of an identification task eval, plotting + rank values on the x-axis and the probability of correct identification at + or below that rank on the y-axis. The values for the axis will be computed + using :py:func:`bob.measure.cmc`. + + You need to provide one or more development score file(s) for each + experiment. You can also provide eval files along with dev files. If only + dev-scores are used, the flag `--no-evaluation` must be used. is required + in that case. Files must be 4- or 5- columns format, see :py:func:`bob.bio.base.score.load.four_column` and :py:func:`bob.bio.base.score.load.five_column` for details. Examples: - $ bob bio cmc dev-scores + $ bob bio cmc -v dev-scores - $ bob bio cmc dev-scores1 eval-scores1 dev-scores2 + $ bob bio cmc -v dev-scores1 eval-scores1 dev-scores2 eval-scores2 - $ bob bio cmc -o my_roc.pdf dev-scores1 eval-scores1 + $ bob bio cmc -v -o my_roc.pdf dev-scores1 eval-scores1 """ process = bio_figure.Cmc(ctx, scores, evaluation, load.cmc) process.run() + @click.command() @common_options.scores_argument(nargs=-1) @common_options.title_option() @@ -246,37 +252,38 @@ def cmc(ctx, scores, evaluation, **kargs): def dir(ctx, scores, evaluation, **kargs): """Plots the Detection & Identification Rate curve over the FAR - This curve is designed to be used in an open set identification protocol, and - defined in Chapter 14.1 of [LiJain2005]_. It requires to have at least one - open set probe item, i.e., with no corresponding gallery, such that the + This curve is designed to be used in an open set identification protocol, + and defined in Chapter 14.1 of [LiJain2005]_. It requires to have at least + one open set probe item, i.e., with no corresponding gallery, such that the positives for that pair are ``None``. - The detection and identification curve first computes FAR thresholds based on - the out-of-set probe scores (negative scores). For each probe item, the + The detection and identification curve first computes FAR thresholds based + on the out-of-set probe scores (negative scores). For each probe item, the **maximum** negative score is used. Then, it plots the detection and identification rates for those thresholds, which are based on the in-set probe scores only. See [LiJain2005]_ for more details. .. [LiJain2005] **Stan Li and Anil K. Jain**, *Handbook of Face Recognition*, Springer, 2005 - You need to provide one or more development score file(s) for each experiment. - You can also provide eval files along with dev files. If only dev-scores - are used, the flag `--no-evaluation` must be used. - is required in that case. Files must be 4- or 5- columns format, see + You need to provide one or more development score file(s) for each + experiment. You can also provide eval files along with dev files. If only + dev-scores are used, the flag `--no-evaluation` must be used. is required + in that case. Files must be 4- or 5- columns format, see :py:func:`bob.bio.base.score.load.four_column` and :py:func:`bob.bio.base.score.load.five_column` for details. Examples: - $ bob bio dir dev-scores + $ bob bio dir -v dev-scores - $ bob bio dir dev-scores1 eval-scores1 dev-scores2 + $ bob bio dir -v dev-scores1 eval-scores1 dev-scores2 eval-scores2 - $ bob bio dir -o my_roc.pdf dev-scores1 eval-scores1 + $ bob bio dir -v -o my_roc.pdf dev-scores1 eval-scores1 """ process = bio_figure.Dir(ctx, scores, evaluation, load.cmc) process.run() + @click.command() @common_options.scores_argument(nargs=-1) @common_options.title_option() @@ -298,10 +305,10 @@ def hist(ctx, scores, evaluation, **kwargs): """ Plots histograms of positive and negatives along with threshold criterion. - You need to provide one or more development score file(s) for each experiment. - You can also provide eval files along with dev files. If only dev-scores - are used, the flag `--no-evaluation` must be used. - is required in that case. Files must be 4- or 5- columns format, see + You need to provide one or more development score file(s) for each + experiment. You can also provide eval files along with dev files. If only + dev-scores are used, the flag `--no-evaluation` must be used. is required + in that case. Files must be 4- or 5- columns format, see :py:func:`bob.bio.base.score.load.four_column` and :py:func:`bob.bio.base.score.load.five_column` for details. @@ -311,16 +318,17 @@ def hist(ctx, scores, evaluation, **kwargs): as well, use ``--show-dev`` option. Examples: - $ bob bio hist dev-scores + $ bob bio hist -v dev-scores - $ bob bio hist dev-scores1 eval-scores1 dev-scores2 + $ bob bio hist -v dev-scores1 eval-scores1 dev-scores2 eval-scores2 - $ bob bio hist --criterion --show-dev min-hter dev-scores1 eval-scores1 + $ bob bio hist -v --criterion --show-dev min-hter dev-scores1 eval-scores1 """ process = bio_figure.Hist(ctx, scores, evaluation, load.split) process.run() + @click.command() @common_options.scores_argument(nargs=-1) @common_options.legends_option() @@ -352,10 +360,10 @@ def evaluate(ctx, scores, evaluation, **kwargs): 4. Plots ROC, EPC, DET, score distributions curves to a multi-page PDF file - You need to provide one or more development score file(s) for each experiment. - You can also provide eval files along with dev files. If only dev-scores - are used, the flag `--no-evaluation` must be used. - is required in that case. Files must be 4- or 5- columns format, see + You need to provide one or more development score file(s) for each + experiment. You can also provide eval files along with dev files. If only + dev-scores are used, the flag `--no-evaluation` must be used. is required + in that case. Files must be 4- or 5- columns format, see :py:func:`bob.bio.base.score.load.four_column` and :py:func:`bob.bio.base.score.load.five_column` for details. @@ -366,11 +374,11 @@ def evaluate(ctx, scores, evaluation, **kwargs): * evaluation scores Examples: - $ bob bio evaluate dev-scores + $ bob bio evaluate -v dev-scores - $ bob bio evaluate -l metrics.txt -o my_plots.pdf dev-scores eval-scores + $ bob bio evaluate -v -l metrics.txt -o my_plots.pdf dev-scores eval-scores - $ bob bio evaluate -o my_plots.pdf /path/to/syst-{1,2,3}/{dev,eval}-scores + $ bob bio evaluate -v -o my_plots.pdf /path/to/syst-{1,2,3}/{dev,eval}-scores ''' log_str = '' if 'log' in ctx.meta and ctx.meta['log'] is not None: @@ -387,8 +395,8 @@ def evaluate(ctx, scores, evaluation, **kwargs): ctx.meta['criterion'] = 'min-hter' # no criterion passed in evaluate ctx.invoke(metrics, scores=scores, evaluation=evaluation) if 'far_value' in ctx.meta and ctx.meta['far_value'] is not None: - click.echo("Computing metrics with FAR=%f%s..." %\ - (ctx.meta['far_value'], log_str)) + click.echo("Computing metrics with FAR=%f%s..." % + (ctx.meta['far_value'], log_str)) ctx.meta['criterion'] = 'far' # no criterio % n passed in evaluate ctx.invoke(metrics, scores=scores, evaluation=evaluation) @@ -409,14 +417,14 @@ def evaluate(ctx, scores, evaluation, **kwargs): click.echo("Starting evaluate with dev scores only...") click.echo("Generating ROC in %s..." % ctx.meta['output']) - ctx.forward(roc) # use class defaults plot settings + ctx.forward(roc) # use class defaults plot settings click.echo("Generating DET in %s..." % ctx.meta['output']) - ctx.forward(det) # use class defaults plot settings + ctx.forward(det) # use class defaults plot settings if evaluation: click.echo("Generating EPC in %s..." % ctx.meta['output']) - ctx.forward(epc) # use class defaults plot settings + ctx.forward(epc) # use class defaults plot settings # the last one closes the file ctx.meta['closef'] = True diff --git a/bob/bio/base/script/evaluate.py b/bob/bio/base/script/evaluate.py deleted file mode 100644 index 5ec3b2467b111886272565a0b53ead079b3937cc..0000000000000000000000000000000000000000 --- a/bob/bio/base/script/evaluate.py +++ /dev/null @@ -1,472 +0,0 @@ -#!/usr/bin/env python -# vim: set fileencoding=utf-8 : - -"""This script evaluates the given score files and computes EER, HTER. -It also is able to plot CMC and ROC curves. -You can set the environment variable BOB_NO_STYLE_CHANGES to any value to avoid -this script from changing the matplotlib style values. """ - -from __future__ import print_function - -# matplotlib stuff -import matplotlib -from matplotlib import pyplot -pyplot.switch_backend('pdf') # switch to non-X backend -from matplotlib.backends.backend_pdf import PdfPages - -# import bob.measure after matplotlib, so that it cannot define the backend - -import argparse -import numpy -import math -import os - -import bob.measure -from .. import score - - - -if not os.environ.get('BOB_NO_STYLE_CHANGES'): - # make the fig size smaller so that everything becomes bigger - matplotlib.rc('figure', figsize=(4, 3)) - - -import bob.core -logger = bob.core.log.setup("bob.bio.base") - - -def command_line_arguments(command_line_parameters): - """Parse the program options""" - - # set up command line parser - parser = argparse.ArgumentParser(description=__doc__, - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - - parser.add_argument('-d', '--dev-files', required=True, nargs='+', help = "A list of score files of the development set.") - parser.add_argument('-e', '--eval-files', nargs='+', help = "A list of score files of the evaluation set; if given it must be the same number of files as the --dev-files.") - - parser.add_argument('-s', '--directory', default = '.', help = "A directory, where to find the --dev-files and the --eval-files") - - parser.add_argument('-c', '--criterion', choices = ('EER', 'HTER', 'FAR'), help = "If given, the threshold of the development set will be computed with this criterion.") - parser.add_argument('-f', '--far-value', type=float, default=0.001, help = "The FAR value for which to evaluate (only for --criterion FAR)") - parser.add_argument('-x', '--cllr', action = 'store_true', help = "If given, Cllr and minCllr will be computed.") - parser.add_argument('-m', '--mindcf', action = 'store_true', help = "If given, minDCF will be computed.") - parser.add_argument('--cost', default=0.99, help='Cost for FAR in minDCF') - parser.add_argument('-r', '--rr', action = 'store_true', help = "If given, the Recognition Rate will be computed.") - parser.add_argument('-o', '--rank', type=int, default=1, help = "The rank for which to plot the DIR curve") - parser.add_argument('-t', '--thresholds', type=float, nargs='+', help = "If given, the Recognition Rate will incorporate an Open Set handling, rejecting all scores that are below the given threshold; when multiple thresholds are given, they are applied in the same order as the --dev-files.") - parser.add_argument('-l', '--legends', nargs='+', help = "A list of legend strings used for ROC, CMC and DET plots; if given, must be the same number than --dev-files.") - parser.add_argument('-F', '--legend-font-size', type=int, default=10, help = "Set the font size of the legends.") - parser.add_argument('-P', '--legend-position', type=int, help = "Set the font size of the legends.") - parser.add_argument('-T', '--title', nargs = '+', help = "Overwrite the default title of the plot for development (and evaluation) set") - parser.add_argument('-R', '--roc', help = "If given, ROC curves will be plotted into the given pdf file.") - parser.add_argument('-D', '--det', help = "If given, DET curves will be plotted into the given pdf file.") - parser.add_argument('-C', '--cmc', help = "If given, CMC curves will be plotted into the given pdf file.") - parser.add_argument('-O', '--dir', help = "If given, DIR curves will be plotted into the given pdf file; This is an open-set measure, which cannot be applied to closed set score files.") - parser.add_argument('-E', '--epc', help = "If given, EPC curves will be plotted into the given pdf file. For this plot --eval-files is mandatory.") - parser.add_argument('-M', '--min-far-value', type=float, default=1e-4, help = "Select the minimum FAR value used in ROC plots; should be a power of 10.") - parser.add_argument('-L', '--far-line-at', type=float, help = "If given, draw a veritcal line at this FAR value in the ROC plots.") - - # add verbose option - bob.core.log.add_command_line_option(parser) - - # parse arguments - args = parser.parse_args(command_line_parameters) - - # set verbosity level - bob.core.log.set_verbosity_level(logger, args.verbose) - - # some sanity checks: - for f in args.dev_files + (args.eval_files or []): - real_file = os.path.join(args.directory, f) - if not os.path.exists(real_file): - raise ValueError("The provided score file '%s' does not exist" % real_file) - - if args.eval_files is not None and len(args.dev_files) != len(args.eval_files): - logger.error("The number of --dev-files (%d) and --eval-files (%d) are not identical", len(args.dev_files), len(args.eval_files)) - - # update legends when they are not specified on command line - if args.legends is None: - args.legends = [f.replace('_', '-') for f in args.dev_files] - logger.warn("Legends are not specified; using legends estimated from --dev-files: %s", args.legends) - - # check that the legends have the same length as the dev-files - if len(args.dev_files) != len(args.legends): - logger.error("The number of --dev-files (%d) and --legends (%d) are not identical", len(args.dev_files), len(args.legends)) - - if args.thresholds is not None: - if len(args.thresholds) == 1: - args.thresholds = args.thresholds * len(args.dev_files) - elif len(args.thresholds) != len(args.dev_files): - logger.error("If given, the number of --thresholds imust be either 1, or the same as --dev-files (%d), but it is %d", len(args.dev_files), len(args.thresholds)) - else: - args.thresholds = [None] * len(args.dev_files) - - if args.title is not None: - if args.eval_files is None and len(args.title) != 1: - logger.warning("Ignoring the title for the evaluation set, as no evaluation set is given") - if args.eval_files is not None and len(args.title) < 2: - logger.error("The title for the evaluation set is not specified") - - return args - -def _add_far_labels(min_far): - # compute and apply tick marks - assert min_far > 0 - ticks = [min_far] - while ticks[-1] < 1.: ticks.append(ticks[-1] * 10.) - pyplot.xticks(ticks) - pyplot.axis([min_far, 1., -0.01, 1.01]) - - - -def _plot_roc(frrs, colors, labels, title, fontsize=10, position=None, farfrrs=None, min_far=None): - if position is None: position = 'lower right' - figure = pyplot.figure() - - # plot FAR and CAR for each algorithm - for i in range(len(frrs)): - pyplot.semilogx([f for f in frrs[i][0]], [1. - f for f in frrs[i][1]], color=colors[i], label=labels[i]) - if isinstance(farfrrs, list): - pyplot.plot(farfrrs[i][0], (1.-farfrrs[i][1]), 'o', color=colors[i], markeredgecolor=colors[i]) - - # plot vertical bar, if desired - if farfrrs is not None: - if isinstance(farfrrs, float): - pyplot.plot([farfrrs,farfrrs],[0.,1.], "--", color='black') - else: - pyplot.plot([x[0] for x in farfrrs], [(1.-x[1]) for x in farfrrs], '--', color='black') - - _add_far_labels(min_far) - - # set label, legend and title - pyplot.xlabel('FMR') - pyplot.ylabel('1 - FNMR') - pyplot.grid(True, color=(0.6,0.6,0.6)) - pyplot.legend(loc=position, prop = {'size':fontsize}) - pyplot.title(title) - - return figure - - -def _plot_det(dets, colors, labels, title, fontsize=10, position=None): - if position is None: position = 'upper right' - # open new page for current plot - figure = pyplot.figure(figsize=(matplotlib.rcParams['figure.figsize'][0], - matplotlib.rcParams['figure.figsize'][0] * 0.975)) - pyplot.grid(True) - - # plot the DET curves - for i in range(len(dets)): - pyplot.plot(dets[i][0], dets[i][1], color=colors[i], label=labels[i]) - - # change axes accordingly - det_list = [0.0002, 0.001, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 0.7, 0.9, 0.95] - ticks = [bob.measure.ppndf(d) for d in det_list] - labels = [("%.5f" % d).rstrip('0').rstrip('.') for d in det_list] - pyplot.xticks(ticks, [l if i % 2 else "" for i,l in enumerate(labels)]) - pyplot.yticks(ticks, labels) - pyplot.axis((ticks[0], ticks[-1], ticks[0], ticks[-1])) - - pyplot.xlabel('FMR') - pyplot.ylabel('FNMR') - pyplot.legend(loc=position, prop = {'size':fontsize}) - pyplot.title(title) - - return figure - - -def _plot_cmc(cmcs, colors, labels, title, fontsize=10, position=None): - if position is None: position = 'lower right' - # open new page for current plot - figure = pyplot.figure() - - max_R = 0 - # plot the CMC curves - for i in range(len(cmcs)): - probs = bob.measure.cmc(cmcs[i]) - R = len(probs) - pyplot.semilogx(range(1, R+1), probs, figure=figure, color=colors[i], label=labels[i]) - max_R = max(R, max_R) - - # change axes accordingly - ticks = [int(t) for t in pyplot.xticks()[0]] - pyplot.xlabel('Rank') - pyplot.ylabel('Probability') - pyplot.xticks(ticks, [str(t) for t in ticks]) - pyplot.axis([0, max_R, -0.01, 1.01]) - pyplot.legend(loc=position, prop = {'size':fontsize}) - pyplot.title(title) - - return figure - - -def _plot_dir(cmc_scores, far_values, rank, colors, labels, title, fontsize=10, position=None): - if position is None: position = 'lower right' - # open new page for current plot - figure = pyplot.figure() - - # for each probe, for which no positives exists, get the highest negative - # score; and sort them to compute the FAR thresholds - for i, cmcs in enumerate(cmc_scores): - negatives = sorted(max(neg) for neg, pos in cmcs if (pos is None or not numpy.array(pos).size) and neg is not None) - if not negatives: - raise ValueError("There need to be at least one pair with only negative scores") - - # compute thresholds based on FAR values - thresholds = [bob.measure.far_threshold(negatives, [], v, True) for v in far_values] - - # compute detection and identification rate based on the thresholds for - # the given rank - rates = [bob.measure.detection_identification_rate(cmcs, t, rank) for t in thresholds] - - # plot DIR curve - pyplot.semilogx(far_values, rates, figure=figure, color=colors[i], label=labels[i]) - - # finalize plot - _add_far_labels(far_values[0]) - - pyplot.xlabel('FAR') - pyplot.ylabel('DIR') - pyplot.legend(loc=position, prop = {'size':fontsize}) - pyplot.title(title) - - return figure - - -def _plot_epc(scores_dev, scores_eval, colors, labels, title, fontsize=10, position=None): - if position is None: position = 'upper center' - # open new page for current plot - figure = pyplot.figure() - - # plot the DET curves - for i in range(len(scores_dev)): - x,y = bob.measure.epc(scores_dev[i][0], scores_dev[i][1], scores_eval[i][0], scores_eval[i][1], 100) - pyplot.plot(x, y, color=colors[i], label=labels[i]) - - # change axes accordingly - pyplot.xlabel('alpha') - pyplot.ylabel('HTER') - pyplot.title(title) - pyplot.axis([-0.01, 1.01, -0.01, 0.51]) - pyplot.grid(True) - pyplot.legend(loc=position, prop = {'size':fontsize}) - pyplot.title(title) - - return figure - - -def remove_nan(scores): - """removes the NaNs from the scores""" - nans = numpy.isnan(scores) - sum_nans = sum(nans) - total = len(scores) - return scores[numpy.where(~nans)], sum_nans, total - - -def get_fta(scores): - """calculates the Failure To Acquire (FtA) rate""" - fta_sum, fta_total = 0, 0 - neg, sum_nans, total = remove_nan(scores[0]) - fta_sum += sum_nans - fta_total += total - pos, sum_nans, total = remove_nan(scores[1]) - fta_sum += sum_nans - fta_total += total - return (neg, pos, fta_sum * 100 / float(fta_total)) - - -def main(command_line_parameters=None): - """Reads score files, computes error measures and plots curves.""" - - args = command_line_arguments(command_line_parameters) - - # get some colors for plotting - if len(args.dev_files) > 10: - cmap = pyplot.cm.get_cmap(name='magma') - colors = [cmap(i) for i in numpy.linspace(0, 1.0, len(args.dev_files) + 1)] - else: - # matplotlib 2.0 default color cycler list: Vega category10 palette - colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', - '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', - '#bcbd22', '#17becf'] - - if args.criterion or args.roc or args.det or args.epc or args.cllr or args.mindcf: - - # First, read the score files - logger.info("Loading %d score files of the development set", len(args.dev_files)) - scores_dev = [score.split(os.path.join(args.directory, f)) for f in args.dev_files] - # remove nans - scores_dev = [get_fta(s) for s in scores_dev] - - if args.eval_files: - logger.info("Loading %d score files of the evaluation set", len(args.eval_files)) - scores_eval = [score.split(os.path.join(args.directory, f)) for f in args.eval_files] - # remove nans - scores_eval = [get_fta(s) for s in scores_eval] - - - if args.criterion: - logger.info("Computing %s on the development " % args.criterion + ("and HTER on the evaluation set" if args.eval_files else "set")) - for i in range(len(scores_dev)): - # compute threshold on development set - if args.criterion == 'FAR': - threshold = bob.measure.far_threshold(scores_dev[i][0], scores_dev[i][1], args.far_value/100.) - else: - threshold = {'EER': bob.measure.eer_threshold, 'HTER' : bob.measure.min_hter_threshold} [args.criterion](scores_dev[i][0], scores_dev[i][1]) - # apply threshold to development set - far, frr = bob.measure.farfrr(scores_dev[i][0], scores_dev[i][1], threshold) - if args.criterion == 'FAR': - print("The FRR at FAR=%.1E of the development set of '%s' is %2.3f%% (CAR: %2.3f%%)" % (args.far_value, args.legends[i], frr * 100., 100.*(1-frr))) - else: - print("The %s of the development set of '%s' is %2.3f%%" % (args.criterion, args.legends[i], (far + frr) * 50.)) # / 2 * 100% - if args.eval_files: - # apply threshold to evaluation set - far, frr = bob.measure.farfrr(scores_eval[i][0], scores_eval[i][1], threshold) - if args.criterion == 'FAR': - print("The FRR of the evaluation set of '%s' is %2.3f%% (CAR: %2.3f%%)" % (args.legends[i], frr * 100., 100.*(1-frr))) # / 2 * 100% - else: - print("The HTER of the evaluation set of '%s' is %2.3f%%" % (args.legends[i], (far + frr) * 50.)) # / 2 * 100% - - - if args.mindcf: - logger.info("Computing minDCF on the development " + ("and on the evaluation set" if args.eval_files else "set")) - for i in range(len(scores_dev)): - # compute threshold on development set - threshold = bob.measure.min_weighted_error_rate_threshold(scores_dev[i][0], scores_dev[i][1], args.cost) - # apply threshold to development set - far, frr = bob.measure.farfrr(scores_dev[i][0], scores_dev[i][1], threshold) - print("The minDCF of the development set of '%s' is %2.3f%%" % (args.legends[i], (args.cost * far + (1-args.cost) * frr) * 100. )) - if args.eval_files: - # compute threshold on evaluation set - threshold = bob.measure.min_weighted_error_rate_threshold(scores_eval[i][0], scores_eval[i][1], args.cost) - # apply threshold to evaluation set - far, frr = bob.measure.farfrr(scores_eval[i][0], scores_eval[i][1], threshold) - print("The minDCF of the evaluation set of '%s' is %2.3f%%" % (args.legends[i], (args.cost * far + (1-args.cost) * frr) * 100. )) - - - if args.cllr: - logger.info("Computing Cllr and minCllr on the development " + ("and on the evaluation set" if args.eval_files else "set")) - for i in range(len(scores_dev)): - cllr = bob.measure.calibration.cllr(scores_dev[i][0], scores_dev[i][1]) - min_cllr = bob.measure.calibration.min_cllr(scores_dev[i][0], scores_dev[i][1]) - print("Calibration performance on development set of '%s' is Cllr %1.5f and minCllr %1.5f " % (args.legends[i], cllr, min_cllr)) - if args.eval_files: - cllr = bob.measure.calibration.cllr(scores_eval[i][0], scores_eval[i][1]) - min_cllr = bob.measure.calibration.min_cllr(scores_eval[i][0], scores_eval[i][1]) - print("Calibration performance on evaluation set of '%s' is Cllr %1.5f and minCllr %1.5f" % (args.legends[i], cllr, min_cllr)) - - - if args.roc: - logger.info("Computing CAR curves on the development " + ("and on the evaluation set" if args.eval_files else "set")) - min_far = int(math.floor(math.log(args.min_far_value, 10))) - fars = [math.pow(10., i * 0.25) for i in range(min_far * 4, 0)] + [1.] - frrs_dev = [bob.measure.roc_for_far(scores[0], scores[1], fars) for scores in scores_dev] - if args.eval_files: - frrs_eval = [bob.measure.roc_for_far(scores[0], scores[1], fars) for scores in scores_eval] - - logger.info("Plotting ROC curves to file '%s'", args.roc) - try: - # create a multi-page PDF for the ROC curve - pdf = PdfPages(args.roc) - # create a separate figure for dev and eval - pdf.savefig(_plot_roc( - frrs_dev, colors, args.legends, - args.title[0] if args.title is not None else "ROC for development set", - args.legend_font_size, args.legend_position, args.far_line_at, - min_far=args.min_far_value), bbox_inches='tight') - del frrs_dev - if args.eval_files: - if args.far_line_at is not None: - farfrrs = [] - for i in range(len(scores_dev)): - threshold = bob.measure.far_threshold(scores_dev[i][0], scores_dev[i][1], args.far_line_at) - farfrrs.append(bob.measure.farfrr(scores_eval[i][0], scores_eval[i][1], threshold)) - else: - farfrrs = None - pdf.savefig(_plot_roc(frrs_eval, colors, args.legends, args.title[1] if args.title is not None else "ROC for evaluation set", args.legend_font_size, args.legend_position, farfrrs, min_far=args.min_far_value), bbox_inches='tight') - del frrs_eval - pdf.close() - except RuntimeError as e: - raise RuntimeError("During plotting of ROC curves, the following exception occured:\n%s" % e) - - if args.det: - logger.info("Computing DET curves on the development " + ("and on the evaluation set" if args.eval_files else "set")) - dets_dev = [bob.measure.det(scores[0], scores[1], 1000) for scores in scores_dev] - if args.eval_files: - dets_eval = [bob.measure.det(scores[0], scores[1], 1000) for scores in scores_eval] - - logger.info("Plotting DET curves to file '%s'", args.det) - try: - # create a multi-page PDF for the DET curve - pdf = PdfPages(args.det) - # create a separate figure for dev and eval - pdf.savefig(_plot_det(dets_dev, colors, args.legends, args.title[0] if args.title is not None else "DET for development set", args.legend_font_size, args.legend_position), bbox_inches='tight') - del dets_dev - if args.eval_files: - pdf.savefig(_plot_det(dets_eval, colors, args.legends, args.title[1] if args.title is not None else "DET for evaluation set", args.legend_font_size, args.legend_position), bbox_inches='tight') - del dets_eval - pdf.close() - except RuntimeError as e: - raise RuntimeError("During plotting of DET curves, the following exception occured:\n%s" % e) - - - if args.epc: - logger.info("Plotting EPC curves to file '%s'", args.epc) - - if not args.eval_files: - raise ValueError("To plot the EPC curve the evaluation scores are necessary. Please, set it with the --eval-files option.") - - try: - # create a multi-page PDF for the EPC curve - pdf = PdfPages(args.epc) - pdf.savefig(_plot_epc(scores_dev, scores_eval, colors, args.legends, args.title[0] if args.title is not None else "" , args.legend_font_size, args.legend_position), bbox_inches='tight') - pdf.close() - except RuntimeError as e: - raise RuntimeError("During plotting of EPC curves, the following exception occured:\n%s" % e) - - - - if args.cmc or args.rr or args.dir: - logger.info("Loading CMC data on the development " + ("and on the evaluation set" if args.eval_files else "set")) - cmcs_dev = [score.cmc(os.path.join(args.directory, f)) for f in args.dev_files] - if args.eval_files: - cmcs_eval = [score.cmc(os.path.join(args.directory, f)) for f in args.eval_files] - - if args.cmc: - logger.info("Plotting CMC curves to file '%s'", args.cmc) - try: - # create a multi-page PDF for the CMC curve - pdf = PdfPages(args.cmc) - # create a separate figure for dev and eval - pdf.savefig(_plot_cmc(cmcs_dev, colors, args.legends, args.title[0] if args.title is not None else "CMC curve for development set", args.legend_font_size, args.legend_position), bbox_inches='tight') - if args.eval_files: - pdf.savefig(_plot_cmc(cmcs_eval, colors, args.legends, args.title[1] if args.title is not None else "CMC curve for evaluation set", args.legend_font_size, args.legend_position), bbox_inches='tight') - pdf.close() - except RuntimeError as e: - raise RuntimeError("During plotting of CMC curves, the following exception occured:\n%s\nUsually this happens when the label contains characters that LaTeX cannot parse." % e) - - if args.rr: - logger.info("Computing recognition rate on the development " + ("and on the evaluation set" if args.eval_files else "set")) - for i in range(len(cmcs_dev)): - rr = bob.measure.recognition_rate(cmcs_dev[i], args.thresholds[i]) - print("The Recognition Rate of the development set of '%s' is %2.3f%%" % (args.legends[i], rr * 100.)) - if args.eval_files: - rr = bob.measure.recognition_rate(cmcs_eval[i], args.thresholds[i]) - print("The Recognition Rate of the development set of '%s' is %2.3f%%" % (args.legends[i], rr * 100.)) - - if args.dir: - # compute false alarm values to evaluate - min_far = int(math.floor(math.log(args.min_far_value, 10))) - fars = [math.pow(10., i * 0.25) for i in range(min_far * 4, 0)] + [1.] - logger.info("Plotting DIR curves to file '%s'", args.dir) - try: - # create a multi-page PDF for the DIR curve - pdf = PdfPages(args.dir) - # create a separate figure for dev and eval - pdf.savefig(_plot_dir(cmcs_dev, fars, args.rank, colors, args.legends, args.title[0] if args.title is not None else "DIR curve for development set", args.legend_font_size, args.legend_position), bbox_inches='tight') - if args.eval_files: - pdf.savefig(_plot_dir(cmcs_eval, fars, args.rank, colors, args.legends, args.title[1] if args.title is not None else "DIR curve for evaluation set", args.legend_font_size, args.legend_position), bbox_inches='tight') - pdf.close() - except RuntimeError as e: - raise RuntimeError("During plotting of DIR curves, the following exception occured:\n%s" % e) diff --git a/doc/experiments.rst b/doc/experiments.rst index fca7ba7e02fd1b1aae83557e069b68b878f0acef..08be81fbe3324712479460615822ea734a28b56d 100644 --- a/doc/experiments.rst +++ b/doc/experiments.rst @@ -98,11 +98,11 @@ Running the experiment is then as simple as: .. note:: Chain loading is possible through configuration files, i.e., variables of each config is available during evaluation of the following config file. - + This allows us to spread our experiment setup in several configuration files and have a call similar to this:: - + $ verify.py config_1.py config_2.py config_n.py - + For more information see *Chain Loading* in :ref:`bob.extension.config`. @@ -114,7 +114,7 @@ By default, you can find them in a sub-directory the ``result`` directory, but y .. note:: At Idiap_, the default result directory differs, see ``verify.py --help`` for your directory. - + .. _bob.bio.base.command_line: @@ -155,13 +155,20 @@ However, to be consistent, throughout this documentation we document the options Evaluating Experiments ---------------------- -After the experiment has finished successfully, one or more text file containing -all the scores are written. In this section, commands that helps to quickly -evaluate a set of scores by generating metrics or plots are presented here. -The scripts take as input either a 4-column or 5-column data format as specified -in the documentation of :py:func:`bob.bio.base.score.load.four_column` or +After the experiment has finished successfully, one or more text file +containing all the scores are written. In this section, commands that helps to +quickly evaluate a set of scores by generating metrics or plots are presented +here. The scripts take as input either a 4-column or 5-column data format as +specified in the documentation of +:py:func:`bob.bio.base.score.load.four_column` or :py:func:`bob.bio.base.score.load.five_column`. +Please note that there exists another file called ``Experiment.info`` inside +the result directory. This file is a pure text file and contains the complete +configuration of the experiment. With this configuration it is possible to +inspect all default parameters of the algorithms, and even to re-run the exact +same experiment. + Metrics ======= @@ -213,9 +220,9 @@ For example: Plots ===== -Customizable plotting commands are available in the :py:mod:`bob.bio.base` module. -They take a list of development and/or evaluation files and generate a single PDF -file containing the plots. Available plots are: +Customizable plotting commands are available in the :py:mod:`bob.bio.base` +module. They take a list of development and/or evaluation files and generate a +single PDF file containing the plots. Available plots are: * ``roc`` (receiver operating characteristic) @@ -237,7 +244,7 @@ For example, to generate a CMC curve from development and evaluation datasets: .. code-block:: sh - $bob bio cmc --output 'my_cmc.pdf' dev-1.txt eval-1.txt + $bob bio cmc -v --output 'my_cmc.pdf' dev-1.txt eval-1.txt dev-2.txt eval-2.txt where `my_cmc.pdf` will contain CMC curves for the two experiments. @@ -248,39 +255,27 @@ where `my_cmc.pdf` will contain CMC curves for the two experiments. different plots. You can force gather everything in the same plot using ``--no-split`` option. +.. note:: + The ``--figsize`` and ``--style`` options are two powerful options that can + dramatically change the appearance of your figures. Try them! (e.g. + ``--figsize 12,10 --style grayscale``) + Evaluate ======== A convenient command `evaluate` is provided to generate multiple metrics and -plots for a list of experiments. It generates two `metrics` outputs with EER, -HTER, Cllr, minDCF criteria along with `roc`, `det`, `epc`, `hist` plots for each -experiment. For example: +plots for a list of experiments. It generates two `metrics` outputs with EER, +HTER, Cllr, minDCF criteria along with `roc`, `det`, `epc`, `hist` plots for +each experiment. For example: .. code-block:: sh - $bob bio evaluate -l 'my_metrics.txt' -o 'my_plots.pdf' {sys1, sys2}/ + $bob bio evaluate -v -l 'my_metrics.txt' -o 'my_plots.pdf' {sys1, sys2}/ {eval,dev} will output metrics and plots for the two experiments (dev and eval pairs) in `my_metrics.txt` and `my_plots.pdf`, respectively. -Evaluate script (deprecated) -============================ - -After the experiment has finished successfully, one or more text file containing all the scores are written. -To evaluate the experiment, you can use the generic ``evaluate.py`` script, which has properties for all prevalent evaluation types, such as CMC, DIR, ROC and DET plots, as well as computing recognition rates, EER/HTER, Cllr and minDCF. -Additionally, a combination of different algorithms can be plotted into the same files. -Just specify all the score files that you want to evaluate using the ``--dev-files`` option, and possible legends for the plots (in the same order) using the ``--legends`` option, and the according plots will be generated. -For example, to create a ROC curve for the experiment above, use: - -.. code-block:: sh - - $ evaluate.py --dev-files results/pca-experiment/male/nonorm/scores-dev --legend MOBIO --roc MOBIO_MALE_ROC.pdf -vv - - -Please note that there exists another file called ``Experiment.info`` inside the result directory. -This file is a pure text file and contains the complete configuration of the experiment. -With this configuration it is possible to inspect all default parameters of the algorithms, and even to re-run the exact same experiment. .. _running_in_parallel: