diff --git a/bob/bio/base/script/commands.py b/bob/bio/base/script/commands.py
index 5eb7554b04e6531f5ac2b5030ac599cf728e938b..ae944eb23b7d3d8f598309435b6e429a9cf358d2 100644
--- a/bob/bio/base/script/commands.py
+++ b/bob/bio/base/script/commands.py
@@ -22,6 +22,7 @@ def rank_option(**kwargs):
             callback=callback, show_default=True, **kwargs)(func)
     return custom_rank_option
 
+
 @click.command()
 @common_options.scores_argument(nargs=-1)
 @common_options.table_option()
@@ -39,10 +40,10 @@ def metrics(ctx, scores, evaluation, **kargs):
     """Prints a single output line that contains all info for a given
     criterion (eer, min-hter, far, mindcf, cllr, rr).
 
-    You need to provide one or more development score file(s) for each experiment.
-    You can also provide eval files along with dev files. If only dev-scores
-    are used, the flag `--no-evaluation` must be used.
-    is required in that case. Files must be 4- or 5- columns format, see
+    You need to provide one or more development score file(s) for each
+    experiment. You can also provide eval files along with dev files. If only
+    dev-scores are used, the flag `--no-evaluation` must be used. is required
+    in that case. Files must be 4- or 5- columns format, see
     :py:func:`bob.bio.base.score.load.four_column` and
     :py:func:`bob.bio.base.score.load.five_column` for details.
 
@@ -65,6 +66,7 @@ def metrics(ctx, scores, evaluation, **kargs):
         process = bio_figure.Metrics(ctx, scores, evaluation, load.split)
     process.run()
 
+
 @click.command()
 @common_options.scores_argument(nargs=-1)
 @common_options.title_option()
@@ -92,24 +94,25 @@ def roc(ctx, scores, evaluation, **kargs):
     false non match rate on the vertical axis.  The values for the axis will be
     computed using :py:func:`bob.measure.roc`.
 
-    You need to provide one or more development score file(s) for each experiment.
-    You can also provide eval files along with dev files. If only dev-scores
-    are used, the flag `--no-evaluation` must be used.
-    is required in that case. Files must be 4- or 5- columns format, see
+    You need to provide one or more development score file(s) for each
+    experiment. You can also provide eval files along with dev files. If only
+    dev-scores are used, the flag `--no-evaluation` must be used. is required
+    in that case. Files must be 4- or 5- columns format, see
     :py:func:`bob.bio.base.score.load.four_column` and
     :py:func:`bob.bio.base.score.load.five_column` for details.
 
     Examples:
-        $ bob bio roc dev-scores
+        $ bob bio roc -v dev-scores
 
-        $ bob bio roc dev-scores1 eval-scores1 dev-scores2
+        $ bob bio roc -v dev-scores1 eval-scores1 dev-scores2
         eval-scores2
 
-        $ bob bio roc -o my_roc.pdf dev-scores1 eval-scores1
+        $ bob bio roc -v -o my_roc.pdf dev-scores1 eval-scores1
     """
     process = bio_figure.Roc(ctx, scores, evaluation, load.split)
     process.run()
 
+
 @click.command()
 @common_options.scores_argument(nargs=-1)
 @common_options.title_option()
@@ -135,24 +138,25 @@ def det(ctx, scores, evaluation, **kargs):
     modified ROC curve which plots error rates on both axes
     (false positives on the x-axis and false negatives on the y-axis)
 
-    You need to provide one or more development score file(s) for each experiment.
-    You can also provide eval files along with dev files. If only dev-scores
-    are used, the flag `--no-evaluation` must be used.
-    is required in that case. Files must be 4- or 5- columns format, see
+    You need to provide one or more development score file(s) for each
+    experiment. You can also provide eval files along with dev files. If only
+    dev-scores are used, the flag `--no-evaluation` must be used. is required
+    in that case. Files must be 4- or 5- columns format, see
     :py:func:`bob.bio.base.score.load.four_column` and
     :py:func:`bob.bio.base.score.load.five_column` for details.
 
     Examples:
-        $ bob bio det dev-scores
+        $ bob bio det -v dev-scores
 
-        $ bob bio det dev-scores1 eval-scores1 dev-scores2
+        $ bob bio det -v dev-scores1 eval-scores1 dev-scores2
         eval-scores2
 
-        $ bob bio det -o my_det.pdf dev-scores1 eval-scores1
+        $ bob bio det -v -o my_det.pdf dev-scores1 eval-scores1
     """
     process = bio_figure.Det(ctx, scores, evaluation, load.split)
     process.run()
 
+
 @click.command()
 @common_options.scores_argument(min_arg=1, force_eval=True, nargs=-1)
 @common_options.title_option()
@@ -177,13 +181,14 @@ def epc(ctx, scores, **kargs):
     :py:func:`bob.bio.base.score.load.five_column` for details.
 
     Examples:
-        $ bob bio epc dev-scores eval-scores
+        $ bob bio epc -v dev-scores eval-scores
 
-        $ bob bio epc -o my_epc.pdf dev-scores1 eval-scores1
+        $ bob bio epc -v -o my_epc.pdf dev-scores1 eval-scores1
     """
     process = measure_figure.Epc(ctx, scores, True, load.split)
     process.run()
 
+
 @click.command()
 @common_options.scores_argument(nargs=-1)
 @common_options.title_option()
@@ -202,30 +207,31 @@ def epc(ctx, scores, **kargs):
 @click.pass_context
 def cmc(ctx, scores, evaluation, **kargs):
     """Plot CMC (cumulative match characteristic curve):
-    graphical presentation of results of an identification task eval,
-    plotting rank values on the x-axis and the probability of correct identification
-    at or below that rank on the y-axis. The values for the axis will be
-    computed using :py:func:`bob.measure.cmc`.
-
-    You need to provide one or more development score file(s) for each experiment.
-    You can also provide eval files along with dev files. If only dev-scores
-    are used, the flag `--no-evaluation` must be used.
-    is required in that case. Files must be 4- or 5- columns format, see
+    graphical presentation of results of an identification task eval, plotting
+    rank values on the x-axis and the probability of correct identification at
+    or below that rank on the y-axis. The values for the axis will be computed
+    using :py:func:`bob.measure.cmc`.
+
+    You need to provide one or more development score file(s) for each
+    experiment. You can also provide eval files along with dev files. If only
+    dev-scores are used, the flag `--no-evaluation` must be used. is required
+    in that case. Files must be 4- or 5- columns format, see
     :py:func:`bob.bio.base.score.load.four_column` and
     :py:func:`bob.bio.base.score.load.five_column` for details.
 
 
     Examples:
-        $ bob bio cmc dev-scores
+        $ bob bio cmc -v dev-scores
 
-        $ bob bio cmc dev-scores1 eval-scores1 dev-scores2
+        $ bob bio cmc -v dev-scores1 eval-scores1 dev-scores2
         eval-scores2
 
-        $ bob bio cmc -o my_roc.pdf dev-scores1 eval-scores1
+        $ bob bio cmc -v -o my_roc.pdf dev-scores1 eval-scores1
     """
     process = bio_figure.Cmc(ctx, scores, evaluation, load.cmc)
     process.run()
 
+
 @click.command()
 @common_options.scores_argument(nargs=-1)
 @common_options.title_option()
@@ -246,37 +252,38 @@ def cmc(ctx, scores, evaluation, **kargs):
 def dir(ctx, scores, evaluation, **kargs):
     """Plots the Detection & Identification Rate curve over the FAR
 
-    This curve is designed to be used in an open set identification protocol, and
-    defined in Chapter 14.1 of [LiJain2005]_.  It requires to have at least one
-    open set probe item, i.e., with no corresponding gallery, such that the
+    This curve is designed to be used in an open set identification protocol,
+    and defined in Chapter 14.1 of [LiJain2005]_.  It requires to have at least
+    one open set probe item, i.e., with no corresponding gallery, such that the
     positives for that pair are ``None``.
 
-    The detection and identification curve first computes FAR thresholds based on
-    the out-of-set probe scores (negative scores).  For each probe item, the
+    The detection and identification curve first computes FAR thresholds based
+    on the out-of-set probe scores (negative scores).  For each probe item, the
     **maximum** negative score is used.  Then, it plots the detection and
     identification rates for those thresholds, which are based on the in-set
     probe scores only. See [LiJain2005]_ for more details.
 
     .. [LiJain2005] **Stan Li and Anil K. Jain**, *Handbook of Face Recognition*, Springer, 2005
 
-    You need to provide one or more development score file(s) for each experiment.
-    You can also provide eval files along with dev files. If only dev-scores
-    are used, the flag `--no-evaluation` must be used.
-    is required in that case. Files must be 4- or 5- columns format, see
+    You need to provide one or more development score file(s) for each
+    experiment. You can also provide eval files along with dev files. If only
+    dev-scores are used, the flag `--no-evaluation` must be used. is required
+    in that case. Files must be 4- or 5- columns format, see
     :py:func:`bob.bio.base.score.load.four_column` and
     :py:func:`bob.bio.base.score.load.five_column` for details.
 
     Examples:
-        $ bob bio dir dev-scores
+        $ bob bio dir -v dev-scores
 
-        $ bob bio dir dev-scores1 eval-scores1 dev-scores2
+        $ bob bio dir -v dev-scores1 eval-scores1 dev-scores2
         eval-scores2
 
-        $ bob bio dir -o my_roc.pdf dev-scores1 eval-scores1
+        $ bob bio dir -v -o my_roc.pdf dev-scores1 eval-scores1
     """
     process = bio_figure.Dir(ctx, scores, evaluation, load.cmc)
     process.run()
 
+
 @click.command()
 @common_options.scores_argument(nargs=-1)
 @common_options.title_option()
@@ -298,10 +305,10 @@ def hist(ctx, scores, evaluation, **kwargs):
     """ Plots histograms of positive and negatives along with threshold
     criterion.
 
-    You need to provide one or more development score file(s) for each experiment.
-    You can also provide eval files along with dev files. If only dev-scores
-    are used, the flag `--no-evaluation` must be used.
-    is required in that case. Files must be 4- or 5- columns format, see
+    You need to provide one or more development score file(s) for each
+    experiment. You can also provide eval files along with dev files. If only
+    dev-scores are used, the flag `--no-evaluation` must be used. is required
+    in that case. Files must be 4- or 5- columns format, see
     :py:func:`bob.bio.base.score.load.four_column` and
     :py:func:`bob.bio.base.score.load.five_column` for details.
 
@@ -311,16 +318,17 @@ def hist(ctx, scores, evaluation, **kwargs):
     as well, use ``--show-dev`` option.
 
     Examples:
-        $ bob bio hist dev-scores
+        $ bob bio hist -v dev-scores
 
-        $ bob bio hist dev-scores1 eval-scores1 dev-scores2
+        $ bob bio hist -v dev-scores1 eval-scores1 dev-scores2
         eval-scores2
 
-        $ bob bio hist --criterion --show-dev min-hter dev-scores1 eval-scores1
+        $ bob bio hist -v --criterion --show-dev min-hter dev-scores1 eval-scores1
     """
     process = bio_figure.Hist(ctx, scores, evaluation, load.split)
     process.run()
 
+
 @click.command()
 @common_options.scores_argument(nargs=-1)
 @common_options.legends_option()
@@ -352,10 +360,10 @@ def evaluate(ctx, scores, evaluation, **kwargs):
     4. Plots ROC, EPC, DET, score distributions
        curves to a multi-page PDF file
 
-    You need to provide one or more development score file(s) for each experiment.
-    You can also provide eval files along with dev files. If only dev-scores
-    are used, the flag `--no-evaluation` must be used.
-    is required in that case. Files must be 4- or 5- columns format, see
+    You need to provide one or more development score file(s) for each
+    experiment. You can also provide eval files along with dev files. If only
+    dev-scores are used, the flag `--no-evaluation` must be used. is required
+    in that case. Files must be 4- or 5- columns format, see
     :py:func:`bob.bio.base.score.load.four_column` and
     :py:func:`bob.bio.base.score.load.five_column` for details.
 
@@ -366,11 +374,11 @@ def evaluate(ctx, scores, evaluation, **kwargs):
     * evaluation scores
 
     Examples:
-        $ bob bio evaluate dev-scores
+        $ bob bio evaluate -v dev-scores
 
-        $ bob bio evaluate -l metrics.txt -o my_plots.pdf dev-scores eval-scores
+        $ bob bio evaluate -v -l metrics.txt -o my_plots.pdf dev-scores eval-scores
 
-        $ bob bio evaluate -o my_plots.pdf /path/to/syst-{1,2,3}/{dev,eval}-scores
+        $ bob bio evaluate -v -o my_plots.pdf /path/to/syst-{1,2,3}/{dev,eval}-scores
     '''
     log_str = ''
     if 'log' in ctx.meta and ctx.meta['log'] is not None:
@@ -387,8 +395,8 @@ def evaluate(ctx, scores, evaluation, **kwargs):
     ctx.meta['criterion'] = 'min-hter'  # no criterion passed in evaluate
     ctx.invoke(metrics, scores=scores, evaluation=evaluation)
     if 'far_value' in ctx.meta and ctx.meta['far_value'] is not None:
-        click.echo("Computing metrics with FAR=%f%s..." %\
-        (ctx.meta['far_value'], log_str))
+        click.echo("Computing metrics with FAR=%f%s..." %
+                   (ctx.meta['far_value'], log_str))
         ctx.meta['criterion'] = 'far'  # no criterio % n passed in evaluate
         ctx.invoke(metrics, scores=scores, evaluation=evaluation)
 
@@ -409,14 +417,14 @@ def evaluate(ctx, scores, evaluation, **kwargs):
         click.echo("Starting evaluate with dev scores only...")
 
     click.echo("Generating ROC in %s..." % ctx.meta['output'])
-    ctx.forward(roc) # use class defaults plot settings
+    ctx.forward(roc)  # use class defaults plot settings
 
     click.echo("Generating DET in %s..." % ctx.meta['output'])
-    ctx.forward(det) # use class defaults plot settings
+    ctx.forward(det)  # use class defaults plot settings
 
     if evaluation:
         click.echo("Generating EPC in %s..." % ctx.meta['output'])
-        ctx.forward(epc) # use class defaults plot settings
+        ctx.forward(epc)  # use class defaults plot settings
 
     # the last one closes the file
     ctx.meta['closef'] = True
diff --git a/bob/bio/base/script/evaluate.py b/bob/bio/base/script/evaluate.py
deleted file mode 100644
index 5ec3b2467b111886272565a0b53ead079b3937cc..0000000000000000000000000000000000000000
--- a/bob/bio/base/script/evaluate.py
+++ /dev/null
@@ -1,472 +0,0 @@
-#!/usr/bin/env python
-# vim: set fileencoding=utf-8 :
-
-"""This script evaluates the given score files and computes EER, HTER.
-It also is able to plot CMC and ROC curves.
-You can set the environment variable BOB_NO_STYLE_CHANGES to any value to avoid
-this script from changing the matplotlib style values. """
-
-from __future__ import print_function
-
-# matplotlib stuff
-import matplotlib
-from matplotlib import pyplot
-pyplot.switch_backend('pdf')  # switch to non-X backend
-from matplotlib.backends.backend_pdf import PdfPages
-
-# import bob.measure after matplotlib, so that it cannot define the backend
-
-import argparse
-import numpy
-import math
-import os
-
-import bob.measure
-from .. import score
-
-
-
-if not os.environ.get('BOB_NO_STYLE_CHANGES'):
-  # make the fig size smaller so that everything becomes bigger
-  matplotlib.rc('figure', figsize=(4, 3))
-
-
-import bob.core
-logger = bob.core.log.setup("bob.bio.base")
-
-
-def command_line_arguments(command_line_parameters):
-  """Parse the program options"""
-
-  # set up command line parser
-  parser = argparse.ArgumentParser(description=__doc__,
-      formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-
-  parser.add_argument('-d', '--dev-files', required=True, nargs='+', help = "A list of score files of the development set.")
-  parser.add_argument('-e', '--eval-files', nargs='+', help = "A list of score files of the evaluation set; if given it must be the same number of files as the --dev-files.")
-
-  parser.add_argument('-s', '--directory', default = '.', help = "A directory, where to find the --dev-files and the --eval-files")
-
-  parser.add_argument('-c', '--criterion', choices = ('EER', 'HTER', 'FAR'), help = "If given, the threshold of the development set will be computed with this criterion.")
-  parser.add_argument('-f', '--far-value', type=float, default=0.001, help = "The FAR value for which to evaluate (only for --criterion FAR)")
-  parser.add_argument('-x', '--cllr', action = 'store_true', help = "If given, Cllr and minCllr will be computed.")
-  parser.add_argument('-m', '--mindcf', action = 'store_true', help = "If given, minDCF will be computed.")
-  parser.add_argument('--cost', default=0.99,  help='Cost for FAR in minDCF')
-  parser.add_argument('-r', '--rr', action = 'store_true', help = "If given, the Recognition Rate will be computed.")
-  parser.add_argument('-o', '--rank', type=int, default=1, help = "The rank for which to plot the DIR curve")
-  parser.add_argument('-t', '--thresholds', type=float, nargs='+', help = "If given, the Recognition Rate will incorporate an Open Set handling, rejecting all scores that are below the given threshold; when multiple thresholds are given, they are applied in the same order as the --dev-files.")
-  parser.add_argument('-l', '--legends', nargs='+', help = "A list of legend strings used for ROC, CMC and DET plots; if given, must be the same number than --dev-files.")
-  parser.add_argument('-F', '--legend-font-size', type=int, default=10, help = "Set the font size of the legends.")
-  parser.add_argument('-P', '--legend-position', type=int, help = "Set the font size of the legends.")
-  parser.add_argument('-T', '--title', nargs = '+', help = "Overwrite the default title of the plot for development (and evaluation) set")
-  parser.add_argument('-R', '--roc', help = "If given, ROC curves will be plotted into the given pdf file.")
-  parser.add_argument('-D', '--det', help = "If given, DET curves will be plotted into the given pdf file.")
-  parser.add_argument('-C', '--cmc', help = "If given, CMC curves will be plotted into the given pdf file.")
-  parser.add_argument('-O', '--dir', help = "If given, DIR curves will be plotted into the given pdf file; This is an open-set measure, which cannot be applied to closed set score files.")
-  parser.add_argument('-E', '--epc', help = "If given, EPC curves will be plotted into the given pdf file. For this plot --eval-files is mandatory.")
-  parser.add_argument('-M', '--min-far-value', type=float, default=1e-4, help = "Select the minimum FAR value used in ROC plots; should be a power of 10.")
-  parser.add_argument('-L', '--far-line-at', type=float, help = "If given, draw a veritcal line at this FAR value in the ROC plots.")
-
-  # add verbose option
-  bob.core.log.add_command_line_option(parser)
-
-  # parse arguments
-  args = parser.parse_args(command_line_parameters)
-
-  # set verbosity level
-  bob.core.log.set_verbosity_level(logger, args.verbose)
-
-  # some sanity checks:
-  for f in args.dev_files + (args.eval_files or []):
-    real_file = os.path.join(args.directory, f)
-    if not os.path.exists(real_file):
-      raise ValueError("The provided score file '%s' does not exist" % real_file)
-
-  if args.eval_files is not None and len(args.dev_files) != len(args.eval_files):
-    logger.error("The number of --dev-files (%d) and --eval-files (%d) are not identical", len(args.dev_files), len(args.eval_files))
-
-  # update legends when they are not specified on command line
-  if args.legends is None:
-    args.legends = [f.replace('_', '-') for f in args.dev_files]
-    logger.warn("Legends are not specified; using legends estimated from --dev-files: %s", args.legends)
-
-  # check that the legends have the same length as the dev-files
-  if len(args.dev_files) != len(args.legends):
-    logger.error("The number of --dev-files (%d) and --legends (%d) are not identical", len(args.dev_files), len(args.legends))
-
-  if args.thresholds is not None:
-    if len(args.thresholds) == 1:
-      args.thresholds = args.thresholds * len(args.dev_files)
-    elif len(args.thresholds) != len(args.dev_files):
-      logger.error("If given, the number of --thresholds imust be either 1, or the same as --dev-files (%d), but it is %d", len(args.dev_files), len(args.thresholds))
-  else:
-    args.thresholds = [None] * len(args.dev_files)
-
-  if args.title is not None:
-    if args.eval_files is None and len(args.title) != 1:
-      logger.warning("Ignoring the title for the evaluation set, as no evaluation set is given")
-    if args.eval_files is not None and len(args.title) < 2:
-      logger.error("The title for the evaluation set is not specified")
-
-  return args
-
-def _add_far_labels(min_far):
-  # compute and apply tick marks
-  assert min_far > 0
-  ticks = [min_far]
-  while ticks[-1] < 1.: ticks.append(ticks[-1] * 10.)
-  pyplot.xticks(ticks)
-  pyplot.axis([min_far, 1., -0.01, 1.01])
-
-
-
-def _plot_roc(frrs, colors, labels, title, fontsize=10, position=None, farfrrs=None, min_far=None):
-  if position is None: position = 'lower right'
-  figure = pyplot.figure()
-
-  # plot FAR and CAR for each algorithm
-  for i in range(len(frrs)):
-    pyplot.semilogx([f for f in frrs[i][0]], [1. - f for f in frrs[i][1]], color=colors[i], label=labels[i])
-    if isinstance(farfrrs, list):
-      pyplot.plot(farfrrs[i][0], (1.-farfrrs[i][1]), 'o', color=colors[i], markeredgecolor=colors[i])
-
-  # plot vertical bar, if desired
-  if farfrrs is not None:
-    if isinstance(farfrrs, float):
-      pyplot.plot([farfrrs,farfrrs],[0.,1.], "--", color='black')
-    else:
-      pyplot.plot([x[0] for x in farfrrs], [(1.-x[1]) for x in farfrrs], '--', color='black')
-
-  _add_far_labels(min_far)
-
-  # set label, legend and title
-  pyplot.xlabel('FMR')
-  pyplot.ylabel('1 - FNMR')
-  pyplot.grid(True, color=(0.6,0.6,0.6))
-  pyplot.legend(loc=position, prop = {'size':fontsize})
-  pyplot.title(title)
-
-  return figure
-
-
-def _plot_det(dets, colors, labels, title, fontsize=10, position=None):
-  if position is None: position = 'upper right'
-  # open new page for current plot
-  figure = pyplot.figure(figsize=(matplotlib.rcParams['figure.figsize'][0],
-                                  matplotlib.rcParams['figure.figsize'][0] * 0.975))
-  pyplot.grid(True)
-
-  # plot the DET curves
-  for i in range(len(dets)):
-    pyplot.plot(dets[i][0], dets[i][1], color=colors[i], label=labels[i])
-
-  # change axes accordingly
-  det_list = [0.0002, 0.001, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 0.7, 0.9, 0.95]
-  ticks = [bob.measure.ppndf(d) for d in det_list]
-  labels = [("%.5f" % d).rstrip('0').rstrip('.') for d in det_list]
-  pyplot.xticks(ticks, [l if i % 2 else "" for i,l in enumerate(labels)])
-  pyplot.yticks(ticks, labels)
-  pyplot.axis((ticks[0], ticks[-1], ticks[0], ticks[-1]))
-
-  pyplot.xlabel('FMR')
-  pyplot.ylabel('FNMR')
-  pyplot.legend(loc=position, prop = {'size':fontsize})
-  pyplot.title(title)
-
-  return figure
-
-
-def _plot_cmc(cmcs, colors, labels, title, fontsize=10, position=None):
-  if position is None: position = 'lower right'
-  # open new page for current plot
-  figure = pyplot.figure()
-
-  max_R = 0
-  # plot the CMC curves
-  for i in range(len(cmcs)):
-    probs = bob.measure.cmc(cmcs[i])
-    R = len(probs)
-    pyplot.semilogx(range(1, R+1), probs, figure=figure, color=colors[i], label=labels[i])
-    max_R = max(R, max_R)
-
-  # change axes accordingly
-  ticks = [int(t) for t in pyplot.xticks()[0]]
-  pyplot.xlabel('Rank')
-  pyplot.ylabel('Probability')
-  pyplot.xticks(ticks, [str(t) for t in ticks])
-  pyplot.axis([0, max_R, -0.01, 1.01])
-  pyplot.legend(loc=position, prop = {'size':fontsize})
-  pyplot.title(title)
-
-  return figure
-
-
-def _plot_dir(cmc_scores, far_values, rank, colors, labels, title, fontsize=10, position=None):
-  if position is None: position = 'lower right'
-  # open new page for current plot
-  figure = pyplot.figure()
-
-  # for each probe, for which no positives exists, get the highest negative
-  # score; and sort them to compute the FAR thresholds
-  for i, cmcs in enumerate(cmc_scores):
-    negatives = sorted(max(neg) for neg, pos in cmcs if (pos is None or not numpy.array(pos).size) and neg is not None)
-    if not negatives:
-      raise ValueError("There need to be at least one pair with only negative scores")
-
-    # compute thresholds based on FAR values
-    thresholds = [bob.measure.far_threshold(negatives, [], v, True) for v in far_values]
-
-    # compute detection and identification rate based on the thresholds for
-    # the given rank
-    rates = [bob.measure.detection_identification_rate(cmcs, t, rank) for t in thresholds]
-
-    # plot DIR curve
-    pyplot.semilogx(far_values, rates, figure=figure, color=colors[i], label=labels[i])
-
-  # finalize plot
-  _add_far_labels(far_values[0])
-
-  pyplot.xlabel('FAR')
-  pyplot.ylabel('DIR')
-  pyplot.legend(loc=position, prop = {'size':fontsize})
-  pyplot.title(title)
-
-  return figure
-
-
-def _plot_epc(scores_dev, scores_eval, colors, labels, title, fontsize=10, position=None):
-  if position is None: position = 'upper center'
-  # open new page for current plot
-  figure = pyplot.figure()
-
-  # plot the DET curves
-  for i in range(len(scores_dev)):
-    x,y = bob.measure.epc(scores_dev[i][0], scores_dev[i][1], scores_eval[i][0], scores_eval[i][1], 100)
-    pyplot.plot(x, y, color=colors[i], label=labels[i])
-
-  # change axes accordingly
-  pyplot.xlabel('alpha')
-  pyplot.ylabel('HTER')
-  pyplot.title(title)
-  pyplot.axis([-0.01, 1.01, -0.01, 0.51])
-  pyplot.grid(True)
-  pyplot.legend(loc=position, prop = {'size':fontsize})
-  pyplot.title(title)
-
-  return figure
-
-
-def remove_nan(scores):
-    """removes the NaNs from the scores"""
-    nans = numpy.isnan(scores)
-    sum_nans = sum(nans)
-    total = len(scores)
-    return scores[numpy.where(~nans)], sum_nans, total
-
-
-def get_fta(scores):
-    """calculates the Failure To Acquire (FtA) rate"""
-    fta_sum, fta_total = 0, 0
-    neg, sum_nans, total = remove_nan(scores[0])
-    fta_sum += sum_nans
-    fta_total += total
-    pos, sum_nans, total = remove_nan(scores[1])
-    fta_sum += sum_nans
-    fta_total += total
-    return (neg, pos, fta_sum * 100 / float(fta_total))
-
-
-def main(command_line_parameters=None):
-  """Reads score files, computes error measures and plots curves."""
-
-  args = command_line_arguments(command_line_parameters)
-
-  # get some colors for plotting
-  if len(args.dev_files) > 10:
-    cmap = pyplot.cm.get_cmap(name='magma')
-    colors = [cmap(i) for i in numpy.linspace(0, 1.0, len(args.dev_files) + 1)]
-  else:
-    # matplotlib 2.0 default color cycler list: Vega category10 palette
-    colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728',
-              '#9467bd', '#8c564b', '#e377c2', '#7f7f7f',
-              '#bcbd22', '#17becf']
-
-  if args.criterion or args.roc or args.det or args.epc or args.cllr or args.mindcf:
-
-    # First, read the score files
-    logger.info("Loading %d score files of the development set", len(args.dev_files))
-    scores_dev = [score.split(os.path.join(args.directory, f)) for f in args.dev_files]
-    # remove nans
-    scores_dev = [get_fta(s) for s in scores_dev]
-
-    if args.eval_files:
-      logger.info("Loading %d score files of the evaluation set", len(args.eval_files))
-      scores_eval = [score.split(os.path.join(args.directory, f)) for f in args.eval_files]
-      # remove nans
-      scores_eval = [get_fta(s) for s in scores_eval]
-
-
-    if args.criterion:
-      logger.info("Computing %s on the development " % args.criterion + ("and HTER on the evaluation set" if args.eval_files else "set"))
-      for i in range(len(scores_dev)):
-        # compute threshold on development set
-        if args.criterion == 'FAR':
-          threshold = bob.measure.far_threshold(scores_dev[i][0], scores_dev[i][1], args.far_value/100.)
-        else:
-          threshold = {'EER': bob.measure.eer_threshold, 'HTER' : bob.measure.min_hter_threshold} [args.criterion](scores_dev[i][0], scores_dev[i][1])
-        # apply threshold to development set
-        far, frr = bob.measure.farfrr(scores_dev[i][0], scores_dev[i][1], threshold)
-        if args.criterion == 'FAR':
-          print("The FRR at FAR=%.1E of the development set of '%s' is %2.3f%% (CAR: %2.3f%%)" % (args.far_value, args.legends[i], frr * 100., 100.*(1-frr)))
-        else:
-          print("The %s of the development set of '%s' is %2.3f%%" % (args.criterion, args.legends[i], (far + frr) * 50.)) # / 2 * 100%
-        if args.eval_files:
-          # apply threshold to evaluation set
-          far, frr = bob.measure.farfrr(scores_eval[i][0], scores_eval[i][1], threshold)
-          if args.criterion == 'FAR':
-            print("The FRR of the evaluation set of '%s' is %2.3f%% (CAR: %2.3f%%)" % (args.legends[i], frr * 100., 100.*(1-frr))) # / 2 * 100%
-          else:
-            print("The HTER of the evaluation set of '%s' is %2.3f%%" % (args.legends[i], (far + frr) * 50.)) # / 2 * 100%
-
-
-    if args.mindcf:
-      logger.info("Computing minDCF on the development " + ("and on the evaluation set" if args.eval_files else "set"))
-      for i in range(len(scores_dev)):
-        # compute threshold on development set
-        threshold = bob.measure.min_weighted_error_rate_threshold(scores_dev[i][0], scores_dev[i][1], args.cost)
-        # apply threshold to development set
-        far, frr = bob.measure.farfrr(scores_dev[i][0], scores_dev[i][1], threshold)
-        print("The minDCF of the development set of '%s' is %2.3f%%" % (args.legends[i], (args.cost * far + (1-args.cost) * frr) * 100. ))
-        if args.eval_files:
-          # compute threshold on evaluation set
-          threshold = bob.measure.min_weighted_error_rate_threshold(scores_eval[i][0], scores_eval[i][1], args.cost)
-          # apply threshold to evaluation set
-          far, frr = bob.measure.farfrr(scores_eval[i][0], scores_eval[i][1], threshold)
-          print("The minDCF of the evaluation set of '%s' is %2.3f%%" % (args.legends[i], (args.cost * far + (1-args.cost) * frr) * 100. ))
-
-
-    if args.cllr:
-      logger.info("Computing Cllr and minCllr on the development " + ("and on the evaluation set" if args.eval_files else "set"))
-      for i in range(len(scores_dev)):
-        cllr = bob.measure.calibration.cllr(scores_dev[i][0], scores_dev[i][1])
-        min_cllr = bob.measure.calibration.min_cllr(scores_dev[i][0], scores_dev[i][1])
-        print("Calibration performance on development set of '%s' is Cllr %1.5f and minCllr %1.5f " % (args.legends[i], cllr, min_cllr))
-        if args.eval_files:
-          cllr = bob.measure.calibration.cllr(scores_eval[i][0], scores_eval[i][1])
-          min_cllr = bob.measure.calibration.min_cllr(scores_eval[i][0], scores_eval[i][1])
-          print("Calibration performance on evaluation set of '%s' is Cllr %1.5f and minCllr %1.5f" % (args.legends[i], cllr, min_cllr))
-
-
-    if args.roc:
-      logger.info("Computing CAR curves on the development " + ("and on the evaluation set" if args.eval_files else "set"))
-      min_far = int(math.floor(math.log(args.min_far_value, 10)))
-      fars = [math.pow(10., i * 0.25) for i in range(min_far * 4, 0)] + [1.]
-      frrs_dev = [bob.measure.roc_for_far(scores[0], scores[1], fars) for scores in scores_dev]
-      if args.eval_files:
-        frrs_eval = [bob.measure.roc_for_far(scores[0], scores[1], fars) for scores in scores_eval]
-
-      logger.info("Plotting ROC curves to file '%s'", args.roc)
-      try:
-        # create a multi-page PDF for the ROC curve
-        pdf = PdfPages(args.roc)
-        # create a separate figure for dev and eval
-        pdf.savefig(_plot_roc(
-            frrs_dev, colors, args.legends,
-            args.title[0] if args.title is not None else "ROC for development set",
-            args.legend_font_size, args.legend_position, args.far_line_at,
-            min_far=args.min_far_value), bbox_inches='tight')
-        del frrs_dev
-        if args.eval_files:
-          if args.far_line_at is not None:
-            farfrrs = []
-            for i in range(len(scores_dev)):
-              threshold = bob.measure.far_threshold(scores_dev[i][0], scores_dev[i][1], args.far_line_at)
-              farfrrs.append(bob.measure.farfrr(scores_eval[i][0], scores_eval[i][1], threshold))
-          else:
-            farfrrs = None
-          pdf.savefig(_plot_roc(frrs_eval, colors, args.legends, args.title[1] if args.title is not None else "ROC for evaluation set", args.legend_font_size, args.legend_position, farfrrs, min_far=args.min_far_value), bbox_inches='tight')
-          del frrs_eval
-        pdf.close()
-      except RuntimeError as e:
-        raise RuntimeError("During plotting of ROC curves, the following exception occured:\n%s" % e)
-
-    if args.det:
-      logger.info("Computing DET curves on the development " + ("and on the evaluation set" if args.eval_files else "set"))
-      dets_dev = [bob.measure.det(scores[0], scores[1], 1000) for scores in scores_dev]
-      if args.eval_files:
-        dets_eval = [bob.measure.det(scores[0], scores[1], 1000) for scores in scores_eval]
-
-      logger.info("Plotting DET curves to file '%s'", args.det)
-      try:
-        # create a multi-page PDF for the DET curve
-        pdf = PdfPages(args.det)
-        # create a separate figure for dev and eval
-        pdf.savefig(_plot_det(dets_dev, colors, args.legends, args.title[0] if args.title is not None else "DET for development set", args.legend_font_size, args.legend_position), bbox_inches='tight')
-        del dets_dev
-        if args.eval_files:
-          pdf.savefig(_plot_det(dets_eval, colors, args.legends, args.title[1] if args.title is not None else "DET for evaluation set", args.legend_font_size, args.legend_position), bbox_inches='tight')
-          del dets_eval
-        pdf.close()
-      except RuntimeError as e:
-        raise RuntimeError("During plotting of DET curves, the following exception occured:\n%s" % e)
-
-
-    if args.epc:
-      logger.info("Plotting EPC curves to file '%s'", args.epc)
-
-      if not args.eval_files:
-        raise ValueError("To plot the EPC curve the evaluation scores are necessary. Please, set it with the --eval-files option.")
-
-      try:
-        # create a multi-page PDF for the EPC curve
-        pdf = PdfPages(args.epc)
-        pdf.savefig(_plot_epc(scores_dev, scores_eval, colors, args.legends, args.title[0] if args.title is not None else "" , args.legend_font_size, args.legend_position), bbox_inches='tight')
-        pdf.close()
-      except RuntimeError as e:
-        raise RuntimeError("During plotting of EPC curves, the following exception occured:\n%s" % e)
-
-
-
-  if args.cmc or args.rr or args.dir:
-    logger.info("Loading CMC data on the development " + ("and on the evaluation set" if args.eval_files else "set"))
-    cmcs_dev = [score.cmc(os.path.join(args.directory, f)) for f in args.dev_files]
-    if args.eval_files:
-      cmcs_eval = [score.cmc(os.path.join(args.directory, f)) for f in args.eval_files]
-
-    if args.cmc:
-      logger.info("Plotting CMC curves to file '%s'", args.cmc)
-      try:
-        # create a multi-page PDF for the CMC curve
-        pdf = PdfPages(args.cmc)
-        # create a separate figure for dev and eval
-        pdf.savefig(_plot_cmc(cmcs_dev, colors, args.legends, args.title[0] if args.title is not None else "CMC curve for development set", args.legend_font_size, args.legend_position), bbox_inches='tight')
-        if args.eval_files:
-          pdf.savefig(_plot_cmc(cmcs_eval, colors, args.legends, args.title[1] if args.title is not None else "CMC curve for evaluation set", args.legend_font_size, args.legend_position), bbox_inches='tight')
-        pdf.close()
-      except RuntimeError as e:
-        raise RuntimeError("During plotting of CMC curves, the following exception occured:\n%s\nUsually this happens when the label contains characters that LaTeX cannot parse." % e)
-
-    if args.rr:
-      logger.info("Computing recognition rate on the development " + ("and on the evaluation set" if args.eval_files else "set"))
-      for i in range(len(cmcs_dev)):
-        rr = bob.measure.recognition_rate(cmcs_dev[i], args.thresholds[i])
-        print("The Recognition Rate of the development set of '%s' is %2.3f%%" % (args.legends[i], rr * 100.))
-        if args.eval_files:
-          rr = bob.measure.recognition_rate(cmcs_eval[i], args.thresholds[i])
-          print("The Recognition Rate of the development set of '%s' is %2.3f%%" % (args.legends[i], rr * 100.))
-
-    if args.dir:
-      # compute false alarm values to evaluate
-      min_far = int(math.floor(math.log(args.min_far_value, 10)))
-      fars = [math.pow(10., i * 0.25) for i in range(min_far * 4, 0)] + [1.]
-      logger.info("Plotting DIR curves to file '%s'", args.dir)
-      try:
-        # create a multi-page PDF for the DIR curve
-        pdf = PdfPages(args.dir)
-        # create a separate figure for dev and eval
-        pdf.savefig(_plot_dir(cmcs_dev, fars, args.rank, colors, args.legends, args.title[0] if args.title is not None else "DIR curve for development set", args.legend_font_size, args.legend_position), bbox_inches='tight')
-        if args.eval_files:
-          pdf.savefig(_plot_dir(cmcs_eval, fars, args.rank, colors, args.legends, args.title[1] if args.title is not None else "DIR curve for evaluation set", args.legend_font_size, args.legend_position), bbox_inches='tight')
-        pdf.close()
-      except RuntimeError as e:
-        raise RuntimeError("During plotting of DIR curves, the following exception occured:\n%s" % e)
diff --git a/doc/experiments.rst b/doc/experiments.rst
index fca7ba7e02fd1b1aae83557e069b68b878f0acef..08be81fbe3324712479460615822ea734a28b56d 100644
--- a/doc/experiments.rst
+++ b/doc/experiments.rst
@@ -98,11 +98,11 @@ Running the experiment is then as simple as:
 .. note::
    Chain loading is possible through configuration files, i.e., variables of each
    config is available during evaluation of the following config file.
-   
+
    This allows us to spread our experiment setup in several configuration files and have a call similar to this::
-   
+
    $ verify.py config_1.py config_2.py config_n.py
-  
+
    For more information see *Chain Loading* in :ref:`bob.extension.config`.
 
 
@@ -114,7 +114,7 @@ By default, you can find them in a sub-directory the ``result`` directory, but y
 
 .. note::
    At Idiap_, the default result directory differs, see ``verify.py --help`` for your directory.
-   
+
 
 .. _bob.bio.base.command_line:
 
@@ -155,13 +155,20 @@ However, to be consistent, throughout this documentation we document the options
 Evaluating Experiments
 ----------------------
 
-After the experiment has finished successfully, one or more text file containing
-all the scores are written. In this section, commands that helps to quickly
-evaluate a set of scores by generating metrics or plots are presented here.
-The scripts take as input either a 4-column or 5-column data format as specified
-in the documentation of :py:func:`bob.bio.base.score.load.four_column` or 
+After the experiment has finished successfully, one or more text file
+containing all the scores are written. In this section, commands that helps to
+quickly evaluate a set of scores by generating metrics or plots are presented
+here. The scripts take as input either a 4-column or 5-column data format as
+specified in the documentation of
+:py:func:`bob.bio.base.score.load.four_column` or
 :py:func:`bob.bio.base.score.load.five_column`.
 
+Please note that there exists another file called ``Experiment.info`` inside
+the result directory. This file is a pure text file and contains the complete
+configuration of the experiment. With this configuration it is possible to
+inspect all default parameters of the algorithms, and even to re-run the exact
+same experiment.
+
 Metrics
 =======
 
@@ -213,9 +220,9 @@ For example:
 Plots
 =====
 
-Customizable plotting commands are available in the :py:mod:`bob.bio.base` module.
-They take a list of development and/or evaluation files and generate a single PDF
-file containing the plots. Available plots are:
+Customizable plotting commands are available in the :py:mod:`bob.bio.base`
+module. They take a list of development and/or evaluation files and generate a
+single PDF file containing the plots. Available plots are:
 
 *  ``roc`` (receiver operating characteristic)
 
@@ -237,7 +244,7 @@ For example, to generate a CMC curve from development and evaluation datasets:
 
 .. code-block:: sh
 
-    $bob bio cmc --output 'my_cmc.pdf' dev-1.txt eval-1.txt 
+    $bob bio cmc -v --output 'my_cmc.pdf' dev-1.txt eval-1.txt
     dev-2.txt eval-2.txt
 
 where `my_cmc.pdf` will contain CMC curves for the two experiments.
@@ -248,39 +255,27 @@ where `my_cmc.pdf` will contain CMC curves for the two experiments.
     different plots. You can force gather everything in the same plot using
     ``--no-split`` option.
 
+.. note::
+    The ``--figsize`` and ``--style`` options are two powerful options that can
+    dramatically change the appearance of your figures. Try them! (e.g.
+    ``--figsize 12,10 --style grayscale``)
+
 Evaluate
 ========
 
 A convenient command `evaluate` is provided to generate multiple metrics and
-plots for a list of experiments. It generates two `metrics` outputs with EER, 
-HTER, Cllr, minDCF criteria along with `roc`, `det`, `epc`, `hist` plots for each
-experiment. For example:
+plots for a list of experiments. It generates two `metrics` outputs with EER,
+HTER, Cllr, minDCF criteria along with `roc`, `det`, `epc`, `hist` plots for
+each experiment. For example:
 
 .. code-block:: sh
 
-    $bob bio evaluate -l 'my_metrics.txt' -o 'my_plots.pdf' {sys1, sys2}/
+    $bob bio evaluate -v -l 'my_metrics.txt' -o 'my_plots.pdf' {sys1, sys2}/
     {eval,dev}
 
 will output metrics and plots for the two experiments (dev and eval pairs) in
 `my_metrics.txt` and `my_plots.pdf`, respectively.
 
-Evaluate script (deprecated)
-============================
-
-After the experiment has finished successfully, one or more text file containing all the scores are written.
-To evaluate the experiment, you can use the generic ``evaluate.py`` script, which has properties for all prevalent evaluation types, such as CMC, DIR, ROC and DET plots, as well as computing recognition rates, EER/HTER, Cllr and minDCF.
-Additionally, a combination of different algorithms can be plotted into the same files.
-Just specify all the score files that you want to evaluate using the ``--dev-files`` option, and possible legends for the plots (in the same order) using the ``--legends`` option, and the according plots will be generated.
-For example, to create a ROC curve for the experiment above, use:
-
-.. code-block:: sh
-
-   $ evaluate.py --dev-files results/pca-experiment/male/nonorm/scores-dev --legend MOBIO --roc MOBIO_MALE_ROC.pdf -vv
-
-
-Please note that there exists another file called ``Experiment.info`` inside the result directory.
-This file is a pure text file and contains the complete configuration of the experiment.
-With this configuration it is possible to inspect all default parameters of the algorithms, and even to re-run the exact same experiment.
 
 .. _running_in_parallel: