diff --git a/bob/bio/base/script/evaluate.py b/bob/bio/base/script/evaluate.py new file mode 100644 index 0000000000000000000000000000000000000000..df1e74887fe81811dd5c637791013afe1cb642d1 --- /dev/null +++ b/bob/bio/base/script/evaluate.py @@ -0,0 +1,299 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : +# Manuel Guenther <manuel.guenther@idiap.ch> +# Tue Jul 2 14:52:49 CEST 2013 +# +# Copyright (C) 2011-2013 Idiap Research Institute, Martigny, Switzerland +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the ipyplotied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +from __future__ import print_function + +"""This script evaluates the given score files and computes EER, HTER. +It also is able to plot CMC and ROC curves.""" + +import bob.measure + +import argparse +import numpy, math +import os + +# matplotlib stuff +import matplotlib +from matplotlib import pyplot +from matplotlib.backends.backend_pdf import PdfPages + +# enable LaTeX interpreter +matplotlib.rc('text', usetex=True) +matplotlib.rc('font', family='serif') +matplotlib.rc('lines', linewidth = 4) +# increase the default font size +matplotlib.rc('font', size=18) + +import bob.core +logger = bob.core.log.setup("bob.bio.base") + + +def command_line_arguments(command_line_parameters): + """Parse the program options""" + + # set up command line parser + parser = argparse.ArgumentParser(description=__doc__, + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + + parser.add_argument('-d', '--dev-files', required=True, nargs='+', help = "A list of score files of the development set.") + parser.add_argument('-e', '--eval-files', nargs='+', help = "A list of score files of the evaluation set; if given it must be the same number of files as the --dev-files.") + + parser.add_argument('-s', '--directory', default = '.', help = "A directory, where to find the --dev-files and the --eval-files") + + parser.add_argument('-c', '--criterion', choices = ('EER', 'HTER'), help = "If given, the threshold of the development set will be computed with this criterion.") + parser.add_argument('-x', '--cllr', action = 'store_true', help = "If given, Cllr and minCllr will be computed.") + parser.add_argument('-m', '--mindcf', action = 'store_true', help = "If given, minDCF will be computed.") + parser.add_argument('--cost', default=0.99, help='Cost for FAR in minDCF') + parser.add_argument('-r', '--rr', action = 'store_true', help = "If given, the Recognition Rate will be computed.") + parser.add_argument('-l', '--legends', nargs='+', help = "A list of legend strings used for ROC, CMC and DET plots; if given, must be the same number than --dev-files.") + parser.add_argument('-F', '--legend-font-size', type=int, default=18, help = "Set the font size of the legends.") + parser.add_argument('-P', '--legend-position', type=int, help = "Set the font size of the legends.") + parser.add_argument('-R', '--roc', help = "If given, ROC curves will be plotted into the given pdf file.") + parser.add_argument('-D', '--det', help = "If given, DET curves will be plotted into the given pdf file.") + parser.add_argument('-C', '--cmc', help = "If given, CMC curves will be plotted into the given pdf file.") + parser.add_argument('-p', '--parser', default = '4column', choices = ('4column', '5column'), help="The style of the resulting score files. The default fits to the usual output of FaceRecLib score files.") + + parser.add_argument('--self-test', action='store_true', help=argparse.SUPPRESS) + + # add verbose option + bob.core.log.add_command_line_option(parser) + + # parse arguments + args = parser.parse_args(command_line_parameters) + + # set verbosity level + bob.core.log.set_verbosity_level(logger, args.verbose) + + + # some sanity checks: + if args.eval_files is not None and len(args.dev_files) != len(args.eval_files): + logger.error("The number of --dev-files (%d) and --eval-files (%d) are not identical", len(args.dev_files), len(args.eval_files)) + + # update legends when they are not specified on command line + if args.legends is None: + args.legends = [f.replace('_', '-') for f in args.dev_files] + logger.warn("Legends are not specified; using legends estimated from --dev-files: %s", args.legends) + + # check that the legends have the same length as the dev-files + if len(args.dev_files) != len(args.legends): + logger.error("The number of --dev-files (%d) and --legends (%d) are not identical", len(args.dev_files), len(args.legends)) + + return args + + +def _plot_roc(frrs, colors, labels, title, fontsize=18, position=None): + if position is None: position = 4 + figure = pyplot.figure() + # plot FAR and CAR for each algorithm + for i in range(len(frrs)): + pyplot.semilogx([100.0*f for f in frrs[i][0]], [100. - 100.0*f for f in frrs[i][1]], color=colors[i], lw=2, ms=10, mew=1.5, label=labels[i]) + + # finalize plot + pyplot.plot([0.1,0.1],[0,100], "--", color=(0.3,0.3,0.3)) + pyplot.axis([frrs[0][0][0]*100,100,0,100]) + pyplot.xticks((0.01, 0.1, 1, 10, 100), ('0.01', '0.1', '1', '10', '100')) + pyplot.xlabel('FAR (\%)') + pyplot.ylabel('CAR (\%)') + pyplot.grid(True, color=(0.6,0.6,0.6)) + pyplot.legend(loc=position, prop = {'size':fontsize}) + pyplot.title(title) + + return figure + + +def _plot_det(dets, colors, labels, title, fontsize=18, position=None): + if position is None: position = 1 + # open new page for current plot + figure = pyplot.figure(figsize=(8.2,8)) + + # plot the DET curves + for i in range(len(dets)): + pyplot.plot(dets[i][0], dets[i][1], color=colors[i], lw=2, ms=10, mew=1.5, label=labels[i]) + + # change axes accordingly + det_list = [0.0002, 0.001, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 0.7, 0.9, 0.95] + ticks = [bob.measure.ppndf(d) for d in det_list] + labels = [("%.5f" % (d*100)).rstrip('0').rstrip('.') for d in det_list] + pyplot.xticks(ticks, labels) + pyplot.yticks(ticks, labels) + pyplot.axis((ticks[0], ticks[-1], ticks[0], ticks[-1])) + + pyplot.xlabel('FAR (\%)') + pyplot.ylabel('FRR (\%)') + pyplot.legend(loc=position, prop = {'size':fontsize}) + pyplot.title(title) + + return figure + +def _plot_cmc(cmcs, colors, labels, title, fontsize=18, position=None): + if position is None: position = 4 + # open new page for current plot + figure = pyplot.figure() + + max_x = 0 + # plot the DET curves + for i in range(len(cmcs)): + x = bob.measure.plot.cmc(cmcs[i], figure=figure, color=colors[i], lw=2, ms=10, mew=1.5, label=labels[i]) + max_x = max(x, max_x) + + # change axes accordingly + ticks = [int(t) for t in pyplot.xticks()[0]] + pyplot.xlabel('Rank') + pyplot.ylabel('Probability (\%)') + pyplot.xticks(ticks, [str(t) for t in ticks]) + pyplot.axis([0, max_x, 0, 100]) + pyplot.legend(loc=position, prop = {'size':fontsize}) + pyplot.title(title) + + return figure + + +def main(command_line_parameters=None): + """Reads score files, computes error measures and plots curves.""" + + args = command_line_arguments(command_line_parameters) + + # get some colors for plotting + cmap = pyplot.cm.get_cmap(name='hsv') + colors = [cmap(i) for i in numpy.linspace(0, 1.0, len(args.dev_files)+1)] + + if args.criterion or args.roc or args.det or args.cllr or args.mindcf: + score_parser = {'4column' : bob.measure.load.split_four_column, '5column' : bob.measure.load.split_five_column}[args.parser] + + # First, read the score files + logger.info("Loading %d score files of the development set", len(args.dev_files)) + scores_dev = [score_parser(os.path.join(args.directory, f)) for f in args.dev_files] + + if args.eval_files: + logger.info("Loading %d score files of the evaluation set", len(args.eval_files)) + scores_eval = [score_parser(os.path.join(args.directory, f)) for f in args.eval_files] + + + if args.criterion: + logger.info("Computing %s on the development " % args.criterion + ("and HTER on the evaluation set" if args.eval_files else "set")) + for i in range(len(scores_dev)): + # compute threshold on development set + threshold = {'EER': bob.measure.eer_threshold, 'HTER' : bob.measure.min_hter_threshold} [args.criterion](scores_dev[i][0], scores_dev[i][1]) + # apply threshold to development set + far, frr = bob.measure.farfrr(scores_dev[i][0], scores_dev[i][1], threshold) + print("The %s of the development set of '%s' is %2.3f%%" % (args.criterion, args.legends[i], (far + frr) * 50.)) # / 2 * 100% + if args.eval_files: + # apply threshold to evaluation set + far, frr = bob.measure.farfrr(scores_eval[i][0], scores_eval[i][1], threshold) + print("The HTER of the evaluation set of '%s' is %2.3f%%" % (args.legends[i], (far + frr) * 50.)) # / 2 * 100% + + + if args.mindcf: + logger.info("Computing minDCF on the development " + ("and on the evaluation set" if args.eval_files else "set")) + for i in range(len(scores_dev)): + # compute threshold on development set + threshold = bob.measure.min_weighted_error_rate_threshold(scores_dev[i][0], scores_dev[i][1], args.cost) + # apply threshold to development set + far, frr = bob.measure.farfrr(scores_dev[i][0], scores_dev[i][1], threshold) + print("The minDCF of the development set of '%s' is %2.3f%%" % (args.legends[i], (args.cost * far + (1-args.cost) * frr) * 100. )) + if args.eval_files: + # compute threshold on evaluation set + threshold = bob.measure.min_weighted_error_rate_threshold(scores_eval[i][0], scores_eval[i][1], args.cost) + # apply threshold to evaluation set + far, frr = bob.measure.farfrr(scores_eval[i][0], scores_eval[i][1], threshold) + print("The minDCF of the evaluation set of '%s' is %2.3f%%" % (args.legends[i], (args.cost * far + (1-args.cost) * frr) * 100. )) + + + if args.cllr: + logger.info("Computing Cllr and minCllr on the development " + ("and on the evaluation set" if args.eval_files else "set")) + for i in range(len(scores_dev)): + cllr = bob.measure.calibration.cllr(scores_dev[i][0], scores_dev[i][1]) + min_cllr = bob.measure.calibration.min_cllr(scores_dev[i][0], scores_dev[i][1]) + print("Calibration performance on development set of '%s' is Cllr %1.5f and minCllr %1.5f " % (args.legends[i], cllr, min_cllr)) + if args.eval_files: + cllr = bob.measure.calibration.cllr(scores_eval[i][0], scores_eval[i][1]) + min_cllr = bob.measure.calibration.min_cllr(scores_eval[i][0], scores_eval[i][1]) + print("Calibration performance on evaluation set of '%s' is Cllr %1.5f and minCllr %1.5f" % (args.legends[i], cllr, min_cllr)) + + + if args.roc: + logger.info("Computing CAR curves on the development " + ("and on the evaluation set" if args.eval_files else "set")) + fars = [math.pow(10., i * 0.25) for i in range(-16,0)] + [1.] + frrs_dev = [bob.measure.roc_for_far(scores[0], scores[1], fars) for scores in scores_dev] + if args.eval_files: + frrs_eval = [bob.measure.roc_for_far(scores[0], scores[1], fars) for scores in scores_eval] + + logger.info("Plotting ROC curves to file '%s'", args.roc) + try: + # create a multi-page PDF for the ROC curve + pdf = PdfPages(args.roc) + # create a separate figure for dev and eval + pdf.savefig(_plot_roc(frrs_dev, colors, args.legends, "ROC curve for development set", args.legend_font_size, args.legend_position)) + del frrs_dev + if args.eval_files: + pdf.savefig(_plot_roc(frrs_eval, colors, args.legends, "ROC curve for evaluation set", args.legend_font_size, args.legend_position)) + del frrs_eval + pdf.close() + except RuntimeError as e: + raise RuntimeError("During plotting of ROC curves, the following exception occured:\n%s\nUsually this happens when the label contains characters that LaTeX cannot parse." % e) + + if args.det: + logger.info("Computing DET curves on the development " + ("and on the evaluation set" if args.eval_files else "set")) + dets_dev = [bob.measure.det(scores[0], scores[1], 1000) for scores in scores_dev] + if args.eval_files: + dets_eval = [bob.measure.det(scores[0], scores[1], 1000) for scores in scores_eval] + + logger.info("Plotting DET curves to file '%s'", args.det) + try: + # create a multi-page PDF for the ROC curve + pdf = PdfPages(args.det) + # create a separate figure for dev and eval + pdf.savefig(_plot_det(dets_dev, colors, args.legends, "DET plot for development set", args.legend_font_size, args.legend_position)) + del dets_dev + if args.eval_files: + pdf.savefig(_plot_det(dets_eval, colors, args.legends, "DET plot for evaluation set", args.legend_font_size, args.legend_position)) + del dets_eval + pdf.close() + except RuntimeError as e: + raise RuntimeError("During plotting of ROC curves, the following exception occured:\n%s\nUsually this happens when the label contains characters that LaTeX cannot parse." % e) + + + if args.cmc or args.rr: + logger.info("Loading CMC data on the development " + ("and on the evaluation set" if args.eval_files else "set")) + cmc_parser = {'4column' : bob.measure.load.cmc_four_column, '5column' : bob.measure.load.cmc_five_column}[args.parser] + cmcs_dev = [cmc_parser(os.path.join(args.directory, f)) for f in args.dev_files] + if args.eval_files: + cmcs_eval = [cmc_parser(os.path.join(args.directory, f)) for f in args.eval_files] + + if args.cmc: + logger.info("Plotting CMC curves to file '%s'", args.cmc) + try: + # create a multi-page PDF for the ROC curve + pdf = PdfPages(args.cmc) + # create a separate figure for dev and eval + pdf.savefig(_plot_cmc(cmcs_dev, colors, args.legends, "CMC curve for development set", args.legend_font_size, args.legend_position)) + if args.eval_files: + pdf.savefig(_plot_cmc(cmcs_eval, colors, args.legends, "CMC curve for evaluation set", args.legend_font_size, args.legend_position)) + pdf.close() + except RuntimeError as e: + raise RuntimeError("During plotting of ROC curves, the following exception occured:\n%s\nUsually this happens when the label contains characters that LaTeX cannot parse." % e) + + if args.rr: + logger.info("Computing recognition rate on the development " + ("and on the evaluation set" if args.eval_files else "set")) + for i in range(len(cmcs_dev)): + rr = bob.measure.recognition_rate(cmcs_dev[i]) + print("The Recognition Rate of the development set of '%s' is %2.3f%%" % (args.legends[i], rr * 100.)) + if args.eval_files: + rr = bob.measure.recognition_rate(cmcs_eval[i]) + print("The Recognition Rate of the development set of '%s' is %2.3f%%" % (args.legends[i], rr * 100.)) diff --git a/bob/bio/base/test/test_scripts.py b/bob/bio/base/test/test_scripts.py index 4bf6310ffbde47882d392909ac9a6d485222001f..05d8feb23ade7c136eb60766eb907480a2d2cbf7 100644 --- a/bob/bio/base/test/test_scripts.py +++ b/bob/bio/base/test/test_scripts.py @@ -254,6 +254,30 @@ def test_verify_filelist(): shutil.rmtree(test_dir) +def test_evaluate(): + # tests our 'evaluate' script using the reference files + test_dir = tempfile.mkdtemp(prefix='bobtest_') + reference_files = ('scores-nonorm-dev', 'scores-ztnorm-dev') + plots = [os.path.join(test_dir, '%s.pdf')%f for f in ['roc', 'cmc', 'det']] + parameters = [ + '--dev-files', reference_files[0], reference_files[1], + '--eval-files', reference_files[0], reference_files[1], + '--directory', os.path.join(data_dir), + '--legends', 'no norm', 'ZT norm', + '--criterion', 'HTER', + '--roc', plots[0], + '--det', plots[1], + '--cmc', plots[2], + ] + + # execute the script + from bob.bio.base.script.evaluate import main + main(parameters) + for i in range(3): + assert os.path.exists(plots[i]) + os.remove(plots[i]) + os.rmdir(test_dir) + """ def test11_baselines_api(self): @@ -278,29 +302,6 @@ def test11_baselines_api(self): main(parameters) -def test15_evaluate(self): - # tests our 'evaluate' script using the reference files - test_dir = tempfile.mkdtemp(prefix='bobtest_') - reference_files = ('scores-nonorm-dev', 'scores-ztnorm-dev') - plots = [os.path.join(test_dir, '%s.pdf')%f for f in ['roc', 'cmc', 'det']] - parameters = [ - '--dev-files', reference_files[0], reference_files[1], - '--eval-files', reference_files[0], reference_files[1], - '--directory', os.path.join(base_dir, 'scripts'), - '--legends', 'no norm', 'ZT norm', - '--criterion', 'HTER', - '--roc', plots[0], - '--det', plots[1], - '--cmc', plots[2], - ] - - # execute the script - from facereclib.script.evaluate import main - main(parameters) - for i in range(3): - self.assertTrue(os.path.exists(plots[i])) - os.remove(plots[i]) - os.rmdir(test_dir) def test16_collect_results(self):