diff --git a/bob/bio/base/score/load.py b/bob/bio/base/score/load.py
index 4c5c923fcdfc0cd94b53d18544bf9062ea5a325a..dc4b6fbf4c032d8ac0c4480a5cb37acd1bf53ca7 100644
--- a/bob/bio/base/score/load.py
+++ b/bob/bio/base/score/load.py
@@ -363,7 +363,6 @@ def cmc(filename, ncolumns=None):
     assert ncolumns == 5
     return cmc_five_column(filename)
 
-
 def load_score(filename, ncolumns=None, minimal=False, **kwargs):
   """Load scores using numpy.loadtxt and return the data as a numpy array.
 
@@ -428,6 +427,33 @@ def load_score(filename, ncolumns=None, minimal=False, **kwargs):
   score_lines = numpy.array(score_lines, new_dtype)
   return score_lines
 
+def load_files(filenames, func_load):
+    """Load a list of score files and return a list of tuples of (neg, pos)
+
+    Parameters
+    ----------
+
+    filenames : :any:`list`
+        list of file paths
+    func_load :
+        function that can read files in the list
+
+    Returns
+    -------
+
+    :any:`list`: [(neg,pos)] A list of tuples, where each tuple contains the
+    ``negative`` and ``positive`` sceach system/probee.
+
+    """
+    if filenames is None:
+        return None
+    res = []
+    for filepath in filenames:
+        try:
+            res.append(func_load(filepath))
+        except:
+            raise
+    return res
 
 def get_negatives_positives(score_lines):
   """Take the output of load_score and return negatives and positives.  This
diff --git a/bob/bio/base/script/commands.py b/bob/bio/base/script/commands.py
new file mode 100644
index 0000000000000000000000000000000000000000..20cfffa5d512826cc34e4da931b5d0ad73d3bc58
--- /dev/null
+++ b/bob/bio/base/script/commands.py
@@ -0,0 +1,413 @@
+''' Click commands for ``bob.bio.base`` '''
+
+import click
+import bob.bio.base.script.figure as bio_figure
+import bob.measure.script.figure as measure_figure
+from ..score import load
+from bob.measure.script import common_options
+from bob.extension.scripts.click_helper import (verbosity_option,
+                                                open_file_mode_option)
+
+
+def rank_option(**kwargs):
+    '''Get option for rank parameter'''
+    def custom_rank_option(func):
+        def callback(ctx, param, value):
+            value = 1 if value < 0 else value
+            ctx.meta['rank'] = value
+            return value
+        return click.option(
+            '-rk', '--rank', type=click.INT, default=1,
+            help='Rank for DIC',
+            callback=callback, show_default=True, **kwargs)(func)
+    return custom_rank_option
+
+@click.command()
+@common_options.scores_argument(nargs=-1)
+@common_options.table_option()
+@common_options.eval_option()
+@common_options.output_log_metric_option()
+@common_options.criterion_option(['eer', 'hter', 'far', 'mindcf', 'cllr', 'rr'])
+@common_options.cost_option()
+@common_options.thresholds_option()
+@common_options.far_option()
+@common_options.titles_option()
+@open_file_mode_option()
+@verbosity_option()
+@click.pass_context
+def metrics(ctx, scores, evaluation, **kargs):
+    """Prints a single output line that contains all info for a given
+    criterion (eer,  hter, far, mindcf, cllr, rr).
+
+    You need to provide one or more development score file(s) for each experiment.
+    You can also provide eval files along with dev files. If only dev-scores
+    are used, the flag `--no-evaluation` must be used.
+    is required in that case. Files must be 4- or 5- columns format, see
+    :py:func:`bob.bio.base.score.load.four_column` and
+    :py:func:`bob.bio.base.score.load.five_column` for details.
+
+    Resulting table format can be changer using the `--tablefmt`. Default
+    formats are `rst` when output in the terminal and `latex` when
+    written in a log file (see `--log`)
+
+    Examples:
+        $ bob bio metrics dev-scores
+
+        $ bob bio metrics --no-evaluation dev-scores1 dev-scores2
+
+        $ bob bio metrics -l results.txt dev-scores1 eval-scores1
+
+        $ bob bio metrics {dev,eval}-scores1 {dev,eval}-scores2
+    """
+    if 'criter' in ctx.meta and ctx.meta['criter'] == 'rr':
+        process = bio_figure.Metrics(ctx, scores, evaluation, load.cmc)
+    else:
+        process = bio_figure.Metrics(ctx, scores, evaluation, load.split)
+    process.run()
+
+@click.command()
+@common_options.scores_argument(nargs=-1)
+@common_options.titles_option()
+@common_options.sep_dev_eval_option()
+@common_options.output_plot_file_option(default_out='roc.pdf')
+@common_options.eval_option()
+@common_options.points_curve_option()
+@common_options.semilogx_option(True)
+@common_options.axes_val_option(dflt=[1e-4, 1, 1e-4, 1])
+@common_options.x_rotation_option()
+@common_options.lines_at_option()
+@common_options.x_label_option()
+@common_options.y_label_option()
+@common_options.const_layout_option()
+@common_options.style_option()
+@common_options.figsize_option()
+@verbosity_option()
+@click.pass_context
+def roc(ctx, scores, evaluation, **kargs):
+    """Plot ROC (receiver operating characteristic) curve:
+    The plot will represent the false match rate on the horizontal axis and the
+    false non match rate on the vertical axis.  The values for the axis will be
+    computed using :py:func:`bob.measure.roc`.
+
+    You need to provide one or more development score file(s) for each experiment.
+    You can also provide eval files along with dev files. If only dev-scores
+    are used, the flag `--no-evaluation` must be used.
+    is required in that case. Files must be 4- or 5- columns format, see
+    :py:func:`bob.bio.base.score.load.four_column` and
+    :py:func:`bob.bio.base.score.load.five_column` for details.
+
+    Examples:
+        $ bob bio roc dev-scores
+
+        $ bob bio roc dev-scores1 eval-scores1 dev-scores2
+        eval-scores2
+
+        $ bob bio roc -o my_roc.pdf dev-scores1 eval-scores1
+    """
+    process = bio_figure.Roc(ctx, scores, evaluation, load.split)
+    process.run()
+
+@click.command()
+@common_options.scores_argument(nargs=-1)
+@common_options.output_plot_file_option(default_out='det.pdf')
+@common_options.titles_option()
+@common_options.x_label_option()
+@common_options.y_label_option()
+@common_options.sep_dev_eval_option()
+@common_options.eval_option()
+@common_options.axes_val_option(dflt=[0.01, 95, 0.01, 95])
+@common_options.x_rotation_option(dflt=45)
+@common_options.points_curve_option()
+@common_options.const_layout_option()
+@common_options.style_option()
+@common_options.figsize_option()
+@common_options.lines_at_option()
+@verbosity_option()
+@click.pass_context
+def det(ctx, scores, evaluation, **kargs):
+    """Plot DET (detection error trade-off) curve:
+    modified ROC curve which plots error rates on both axes
+    (false positives on the x-axis and false negatives on the y-axis)
+
+    You need to provide one or more development score file(s) for each experiment.
+    You can also provide eval files along with dev files. If only dev-scores
+    are used, the flag `--no-evaluation` must be used.
+    is required in that case. Files must be 4- or 5- columns format, see
+    :py:func:`bob.bio.base.score.load.four_column` and
+    :py:func:`bob.bio.base.score.load.five_column` for details.
+
+    Examples:
+        $ bob bio det dev-scores
+
+        $ bob bio det dev-scores1 eval-scores1 dev-scores2
+        eval-scores2
+
+        $ bob bio det -o my_det.pdf dev-scores1 eval-scores1
+    """
+    process = bio_figure.Det(ctx, scores, evaluation, load.split)
+    process.run()
+
+@click.command()
+@common_options.scores_argument(min_arg=1, force_eval=True, nargs=-1)
+@common_options.output_plot_file_option(default_out='epc.pdf')
+@common_options.titles_option()
+@common_options.points_curve_option()
+@common_options.const_layout_option()
+@common_options.style_option()
+@common_options.figsize_option()
+@verbosity_option()
+@click.pass_context
+def epc(ctx, scores, **kargs):
+    """Plot EPC (expected performance curve):
+    plots the error rate on the eval set depending on a threshold selected
+    a-priori on the development set and accounts for varying relative cost
+    in [0; 1] of FPR and FNR when calculating the threshold.
+
+    You need to provide one or more development score and eval file(s)
+    for each experiment. Files must be 4- or 5- columns format, see
+    :py:func:`bob.bio.base.score.load.four_column` and
+    :py:func:`bob.bio.base.score.load.five_column` for details.
+
+    Examples:
+        $ bob bio epc dev-scores eval-scores
+
+        $ bob bio epc -o my_epc.pdf dev-scores1 eval-scores1
+    """
+    process = measure_figure.Epc(ctx, scores, True, load.split)
+    process.run()
+
+@click.command()
+@common_options.scores_argument(nargs=-1)
+@common_options.titles_option()
+@common_options.sep_dev_eval_option()
+@common_options.output_plot_file_option(default_out='cmc.pdf')
+@common_options.eval_option()
+@common_options.semilogx_option(True)
+@common_options.axes_val_option(dflt=None)
+@common_options.x_rotation_option()
+@common_options.const_layout_option()
+@common_options.style_option()
+@common_options.figsize_option()
+@verbosity_option()
+@click.pass_context
+def cmc(ctx, scores, evaluation, **kargs):
+    """Plot CMC (cumulative match characteristic curve):
+    graphical presentation of results of an identification task eval,
+    plotting rank values on the x-axis and the probability of correct identification
+    at or below that rank on the y-axis. The values for the axis will be
+    computed using :py:func:`bob.measure.cmc`.
+
+    You need to provide one or more development score file(s) for each experiment.
+    You can also provide eval files along with dev files. If only dev-scores
+    are used, the flag `--no-evaluation` must be used.
+    is required in that case. Files must be 4- or 5- columns format, see
+    :py:func:`bob.bio.base.score.load.four_column` and
+    :py:func:`bob.bio.base.score.load.five_column` for details.
+
+
+    Examples:
+        $ bob bio cmc dev-scores
+
+        $ bob bio cmc dev-scores1 eval-scores1 dev-scores2
+        eval-scores2
+
+        $ bob bio cmc -o my_roc.pdf dev-scores1 eval-scores1
+    """
+    process = bio_figure.Cmc(ctx, scores, evaluation, load.cmc)
+    process.run()
+
+@click.command()
+@common_options.scores_argument(nargs=-1)
+@common_options.titles_option()
+@common_options.sep_dev_eval_option()
+@common_options.output_plot_file_option(default_out='cmc.pdf')
+@common_options.eval_option()
+@common_options.semilogx_option(True)
+@common_options.axes_val_option(dflt=None)
+@common_options.x_rotation_option()
+@rank_option()
+@common_options.const_layout_option()
+@common_options.style_option()
+@common_options.figsize_option()
+@verbosity_option()
+@click.pass_context
+def dic(ctx, scores, evaluation, **kargs):
+    """Plots the Detection & Identification curve over the FAR
+
+    This curve is designed to be used in an open set identification protocol, and
+    defined in Chapter 14.1 of [LiJain2005]_.  It requires to have at least one
+    open set probe item, i.e., with no corresponding gallery, such that the
+    positives for that pair are ``None``.
+
+    The detection and identification curve first computes FAR thresholds based on
+    the out-of-set probe scores (negative scores).  For each probe item, the
+    **maximum** negative score is used.  Then, it plots the detection and
+    identification rates for those thresholds, which are based on the in-set
+    probe scores only. See [LiJain2005]_ for more details.
+
+    .. [LiJain2005] **Stan Li and Anil K. Jain**, *Handbook of Face Recognition*, Springer, 2005
+
+    You need to provide one or more development score file(s) for each experiment.
+    You can also provide eval files along with dev files. If only dev-scores
+    are used, the flag `--no-evaluation` must be used.
+    is required in that case. Files must be 4- or 5- columns format, see
+    :py:func:`bob.bio.base.score.load.four_column` and
+    :py:func:`bob.bio.base.score.load.five_column` for details.
+
+    Examples:
+        $ bob bio dic dev-scores
+
+        $ bob bio dic dev-scores1 eval-scores1 dev-scores2
+        eval-scores2
+
+        $ bob bio dic -o my_roc.pdf dev-scores1 eval-scores1
+    """
+    process = bio_figure.Dic(ctx, scores, evaluation, load.cmc)
+    process.run()
+
+@click.command()
+@common_options.scores_argument(nargs=-1)
+@common_options.output_plot_file_option(default_out='hist.pdf')
+@common_options.eval_option()
+@common_options.n_bins_option()
+@common_options.criterion_option()
+@common_options.thresholds_option()
+@common_options.const_layout_option()
+@common_options.show_dev_option()
+@common_options.print_filenames_option()
+@common_options.titles_option()
+@common_options.style_option()
+@common_options.figsize_option()
+@verbosity_option()
+@click.pass_context
+def hist(ctx, scores, evaluation, **kwargs):
+    """ Plots histograms of positive and negatives along with threshold
+    criterion.
+
+    You need to provide one or more development score file(s) for each experiment.
+    You can also provide eval files along with dev files. If only dev-scores
+    are used, the flag `--no-evaluation` must be used.
+    is required in that case. Files must be 4- or 5- columns format, see
+    :py:func:`bob.bio.base.score.load.four_column` and
+    :py:func:`bob.bio.base.score.load.five_column` for details.
+
+    By default, when eval-scores are given, only eval-scores histograms are
+    displayed with threshold line
+    computed from dev-scores. If you want to display dev-scores distributions
+    as well, use ``--show-dev`` option.
+
+    Examples:
+        $ bob bio hist dev-scores
+
+        $ bob bio hist dev-scores1 eval-scores1 dev-scores2
+        eval-scores2
+
+        $ bob bio hist --criter --show-dev hter dev-scores1 eval-scores1
+    """
+    process = bio_figure.Hist(ctx, scores, evaluation, load.split)
+    process.run()
+
+@click.command()
+@common_options.scores_argument(nargs=-1)
+@common_options.titles_option()
+@common_options.sep_dev_eval_option()
+@common_options.table_option()
+@common_options.eval_option()
+@common_options.output_log_metric_option()
+@common_options.output_plot_file_option(default_out='eval_plots.pdf')
+@common_options.points_curve_option()
+@common_options.lines_at_option()
+@common_options.cost_option()
+@rank_option()
+@common_options.far_option()
+@common_options.const_layout_option()
+@common_options.style_option()
+@common_options.figsize_option()
+@verbosity_option()
+@click.pass_context
+def evaluate(ctx, scores, evaluation, **kwargs):
+    '''Evalutes score file, runs error analysis on score sets and plot curves.
+
+    \b
+    1. Computes the threshold using either EER, min. HTER or FAR value
+       criteria on development set scores
+    2. Applies the above threshold on eval set scores to compute the HTER, if a
+       eval-score set is provided
+    3. Computes Cllr and minCllr and minDCF
+    3. Reports error metrics in the console or in a log file
+    4. Plots ROC, EPC, DET, score distributions
+       curves to a multi-page PDF file
+
+    You need to provide one or more development score file(s) for each experiment.
+    You can also provide eval files along with dev files. If only dev-scores
+    are used, the flag `--no-evaluation` must be used.
+    is required in that case. Files must be 4- or 5- columns format, see
+    :py:func:`bob.bio.base.score.load.four_column` and
+    :py:func:`bob.bio.base.score.load.five_column` for details.
+
+    You need to provide 2 score files for each biometric system in this order:
+
+    \b
+    * development scores
+    * evaluation scores
+
+    Examples:
+        $ bob bio evaluate dev-scores
+
+        $ bob bio evaluate -l metrics.txt -o my_plots.pdf dev-scores eval-scores
+
+        $ bob bio evaluate -o my_plots.pdf /path/to/syst-{1,2,3}/{dev,eval}-scores
+    '''
+    log_str = ''
+    if 'log' in ctx.meta and ctx.meta['log'] is not None:
+        log_str = ' %s' % ctx.meta['log']
+
+    # first time erase if existing file
+    ctx.meta['open_mode'] = 'w'
+    click.echo("Computing metrics with EER%s..." % log_str)
+    ctx.meta['criter'] = 'eer'  # no criterion passed to evaluate
+    ctx.invoke(metrics, scores=scores, evaluation=evaluation)
+    # other times, appends the content
+    ctx.meta['open_mode'] = 'a'
+    click.echo("Computing metrics with HTER%s..." % log_str)
+    ctx.meta['criter'] = 'hter'  # no criterion passed in evaluate
+    ctx.invoke(metrics, scores=scores, evaluation=evaluation)
+    if 'far_value' in ctx.meta and ctx.meta['far_value'] is not None:
+        click.echo("Computing metrics with FAR=%f%s..." %\
+        (ctx.meta['far_value'], log_str))
+        ctx.meta['criter'] = 'far'  # no criterio % n passed in evaluate
+        ctx.invoke(metrics, scores=scores, evaluation=evaluation)
+
+    click.echo("Computing minDCF%s..." % log_str)
+    ctx.meta['criter'] = 'mindcf'  # no criterion passed in evaluate
+    ctx.invoke(metrics, scores=scores, evaluation=evaluation)
+
+    click.echo("Computing  Cllr and minCllr%s..." % log_str)
+    ctx.meta['criter'] = 'cllr'  # no criterion passed in evaluate
+    ctx.invoke(metrics, scores=scores, evaluation=evaluation)
+
+    # avoid closing pdf file before all figures are plotted
+    ctx.meta['closef'] = False
+
+    if evaluation:
+        click.echo("Starting evaluate with dev and eval scores...")
+    else:
+        click.echo("Starting evaluate with dev scores only...")
+
+    click.echo("Generating ROC in %s..." % ctx.meta['output'])
+    ctx.forward(roc) # use class defaults plot settings
+
+    click.echo("Generating DET in %s..." % ctx.meta['output'])
+    ctx.forward(det) # use class defaults plot settings
+
+    if evaluation:
+        click.echo("Generating EPC in %s..." % ctx.meta['output'])
+        ctx.forward(epc) # use class defaults plot settings
+
+    # the last one closes the file
+    ctx.meta['closef'] = True
+    click.echo("Generating score histograms in %s..." % ctx.meta['output'])
+    ctx.meta['criter'] = 'hter'  # no criterion passed in evaluate
+    ctx.forward(hist)
+
+    click.echo("Evaluate successfully completed!")
diff --git a/bob/bio/base/script/evaluate.py b/bob/bio/base/script/evaluate.py
index 8d285f70953599bb36cb1eed02e8cee5a046d6c7..5ec3b2467b111886272565a0b53ead079b3937cc 100644
--- a/bob/bio/base/script/evaluate.py
+++ b/bob/bio/base/script/evaluate.py
@@ -25,6 +25,7 @@ import bob.measure
 from .. import score
 
 
+
 if not os.environ.get('BOB_NO_STYLE_CHANGES'):
   # make the fig size smaller so that everything becomes bigger
   matplotlib.rc('figure', figsize=(4, 3))
@@ -369,7 +370,11 @@ def main(command_line_parameters=None):
         # create a multi-page PDF for the ROC curve
         pdf = PdfPages(args.roc)
         # create a separate figure for dev and eval
-        pdf.savefig(_plot_roc(frrs_dev, colors, args.legends, args.title[0] if args.title is not None else "ROC for development set", args.legend_font_size, args.legend_position, args.far_line_at, min_far=args.min_far_value), bbox_inches='tight')
+        pdf.savefig(_plot_roc(
+            frrs_dev, colors, args.legends,
+            args.title[0] if args.title is not None else "ROC for development set",
+            args.legend_font_size, args.legend_position, args.far_line_at,
+            min_far=args.min_far_value), bbox_inches='tight')
         del frrs_dev
         if args.eval_files:
           if args.far_line_at is not None:
diff --git a/bob/bio/base/script/figure.py b/bob/bio/base/script/figure.py
new file mode 100644
index 0000000000000000000000000000000000000000..46aebc454c333a777c644bae5def63fe52bb4c5f
--- /dev/null
+++ b/bob/bio/base/script/figure.py
@@ -0,0 +1,209 @@
+'''Plots and measures for bob.bio.base'''
+
+import click
+import matplotlib.pyplot as mpl
+import  bob.measure.script.figure as measure_figure
+import bob.measure
+from bob.measure import plot
+from tabulate import tabulate
+
+class Roc(measure_figure.Roc):
+    def __init__(self, ctx, scores, evaluation, func_load):
+        super(Roc, self).__init__(ctx, scores, evaluation, func_load)
+        self._x_label = 'False Match Rate' if 'x_label' not in ctx.meta  or \
+        ctx.meta['x_label'] is None else ctx.meta['x_label']
+        self._y_label = '1 - False Non Match Rate' if 'y_label' not in \
+        ctx.meta or ctx.meta['y_label'] is None else ctx.meta['y_label']
+
+class Det(measure_figure.Det):
+    def __init__(self, ctx, scores, evaluation, func_load):
+        super(Det, self).__init__(ctx, scores, evaluation, func_load)
+        self._x_label = 'False Match Rate' if 'x_label' not in ctx.meta or \
+        ctx.meta['x_label'] is None else ctx.meta['x_label']
+        self._y_label = 'False Non Match Rate' if 'y_label' not in ctx.meta or\
+        ctx.meta['y_label'] is None else ctx.meta['y_label']
+
+class Cmc(measure_figure.PlotBase):
+    ''' Handles the plotting of Cmc '''
+    def __init__(self, ctx, scores, evaluation, func_load):
+        super(Cmc, self).__init__(ctx, scores, evaluation, func_load)
+        self._semilogx = True if 'semilogx' not in ctx.meta else\
+        ctx.meta['semilogx']
+        self._title = self._title or 'CMC'
+        self._x_label = self._x_label or 'Rank'
+        self._y_label = self._y_label or 'Identification rate'
+        self._max_R = 0
+
+    def compute(self, idx, input_scores, input_names):
+        ''' Plot CMC for dev and eval data using
+        :py:func:`bob.measure.plot.cmc`'''
+        mpl.figure(1)
+        if self._eval:
+            linestyle = '-' if not self._split else measure_figure.LINESTYLES[idx % 14]
+            rank = plot.cmc(
+                input_scores[0], logx=self._semilogx,
+                color=self._colors[idx], linestyle=linestyle,
+                label=self._label('development', input_names[0], idx)
+            )
+            self._max_R = max(rank, self._max_R)
+            linestyle = '--'
+            if self._split:
+                mpl.figure(2)
+                linestyle = measure_figure.LINESTYLES[idx % 14]
+
+            rank = plot.cmc(
+                input_scores[1], logx=self._semilogx,
+                color=self._colors[idx], linestyle=linestyle,
+                label=self._label('eval', input_names[1], idx)
+            )
+            self._max_R = max(rank, self._max_R)
+        else:
+            rank = plot.cmc(
+                input_scores[0], logx=self._semilogx,
+                color=self._colors[idx], linestyle=measure_figure.LINESTYLES[idx % 14],
+                label=self._label('development', input_names[0], idx)
+            )
+            self._max_R = max(rank, self._max_R)
+
+class Dic(measure_figure.PlotBase):
+    ''' Handles the plotting of DIC'''
+    def __init__(self, ctx, scores, evaluation, func_load):
+        super(Dic, self).__init__(ctx, scores, evaluation, func_load)
+        self._semilogx = True if 'semilogx' not in ctx.meta else\
+                ctx.meta['semilogx']
+        self._rank = 1 if 'rank' not in ctx.meta else ctx.meta['rank']
+        self._title = self._title or 'DIC'
+        self._x_label = self._title or 'FAR'
+        self._y_label = self._title or 'DIR'
+
+    def compute(self, idx, input_scores, input_names):
+        ''' Plot DIC for dev and eval data using
+        :py:func:`bob.measure.plot.detection_identification_curve`'''
+        mpl.figure(1)
+        if self._eval:
+            linestyle = '-' if not self._split else measure_figure.LINESTYLES[idx % 14]
+            plot.detection_identification_curve(
+                input_scores[0], rank=self._rank, logx=self._semilogx,
+                color=self._colors[idx], linestyle=linestyle,
+                label=self._label('development', input_names[0], idx)
+            )
+            linestyle = '--'
+            if self._split:
+                mpl.figure(2)
+                linestyle = measure_figure.LINESTYLES[idx % 14]
+
+            plot.detection_identification_curve(
+                input_scores[1], rank=self._rank, logx=self._semilogx,
+                color=self._colors[idx], linestyle=linestyle,
+                label=self._label('eval', input_names[1], idx)
+            )
+        else:
+            plot.detection_identification_curve(
+                input_scores[0], rank=self._rank, logx=self._semilogx,
+                color=self._colors[idx], linestyle=measure_figure.LINESTYLES[idx % 14],
+                label=self._label('development', input_names[0], idx)
+            )
+
+class Metrics(measure_figure.Metrics):
+    ''' Compute metrics from score files'''
+    def init_process(self):
+        if self._criter == 'rr':
+            self._thres = [None] * self.n_systems if self._thres is None else \
+                    self._thres
+
+    def compute(self, idx, input_scores, input_names):
+        ''' Compute metrics for the given criteria'''
+        title = self._titles[idx] if self._titles is not None else None
+        headers = ['' or title, 'Development %s' % input_names[0]]
+        if self._eval and input_scores[1] is not None:
+            headers.append('eval % s' % input_names[1])
+        if self._criter == 'rr':
+            rr = bob.measure.recognition_rate(input_scores[0], self._thres[idx])
+            dev_rr = "%.1f%%" % (100 * rr)
+            raws = [['RR', dev_rr]]
+            if self._eval and input_scores[1] is not None:
+                rr = bob.measure.recognition_rate(input_scores[1], self._thres[idx])
+                eval_rr = "%.1f%%" % (100 * rr)
+                raws[0].append(eval_rr)
+            click.echo(
+                tabulate(raws, headers, self._tablefmt), file=self.log_file
+            )
+        elif self._criter == 'mindcf':
+            if 'cost' in self._ctx.meta:
+                cost = 0.99 if 'cost' not in self._ctx.meta else\
+                        self._ctx.meta['cost']
+            threshold = bob.measure.min_weighted_error_rate_threshold(
+                input_scores[0][0], input_scores[0][1], cost
+            ) if self._thres is None else self._thres[idx]
+            if self._thres is None:
+                click.echo(
+                    "[minDCF - Cost:%f] Threshold on Development set `%s`: %e"\
+                    % (cost, input_names[0], threshold),
+                    file=self.log_file
+                )
+            else:
+                click.echo(
+                    "[minDCF] User defined Threshold: %e" %  threshold,
+                    file=self.log_file
+                )
+            # apply threshold to development set
+            far, frr = bob.measure.farfrr(
+                input_scores[0][0], input_scores[0][1], threshold
+            )
+            dev_far_str = "%.1f%%" % (100 * far)
+            dev_frr_str = "%.1f%%" % (100 * frr)
+            dev_mindcf_str = "%.1f%%" % ((cost * far + (1 - cost) * frr) * 100.)
+            raws = [['FAR', dev_far_str],
+                    ['FRR', dev_frr_str],
+                    ['minDCF', dev_mindcf_str]]
+            if self._eval and input_scores[1] is not None:
+                # apply threshold to development set
+                far, frr = bob.measure.farfrr(
+                    input_scores[1][0], input_scores[1][1], threshold
+                )
+                eval_far_str = "%.1f%%" % (100 * far)
+                eval_frr_str = "%.1f%%" % (100 * frr)
+                eval_mindcf_str = "%.1f%%" % ((cost * far + (1 - cost) * frr) * 100.)
+                raws[0].append(eval_far_str)
+                raws[1].append(eval_frr_str)
+                raws[2].append(eval_mindcf_str)
+            click.echo(
+                tabulate(raws, headers, self._tablefmt), file=self.log_file
+            )
+        elif self._criter == 'cllr':
+            cllr = bob.measure.calibration.cllr(input_scores[0][0],
+                                                input_scores[0][1])
+            min_cllr = bob.measure.calibration.min_cllr(
+                input_scores[0][0], input_scores[0][1]
+            )
+            dev_cllr_str = "%.1f%%" % cllr
+            dev_min_cllr_str = "%.1f%%" % min_cllr
+            raws = [['Cllr', dev_cllr_str],
+                    ['minCllr', dev_min_cllr_str]]
+            if self._eval and input_scores[1] is not None:
+                cllr = bob.measure.calibration.cllr(input_scores[1][0],
+                                                    input_scores[1][1])
+                min_cllr = bob.measure.calibration.min_cllr(
+                    input_scores[1][0], input_scores[1][1]
+                )
+                eval_cllr_str = "%.1f%%" % cllr
+                eval_min_cllr_str = "%.1f%%" % min_cllr
+                raws[0].append(eval_cllr_str)
+                raws[1].append(eval_min_cllr_str)
+                click.echo(
+                    tabulate(raws, headers, self._tablefmt), file=self.log_file
+                )
+        else:
+            super(Metrics, self).compute(idx, input_scores, input_names)
+
+class Hist(measure_figure.Hist):
+    ''' Histograms for biometric scores '''
+
+    def _setup_hist(self, neg, pos):
+        self._title_base = 'Bio scores'
+        self._density_hist(
+            pos[0], label='Genuines', alpha=0.9, color='C2'
+        )
+        self._density_hist(
+            neg[0], label='Zero-effort impostors', alpha=0.8, color='C0'
+        )
diff --git a/bob/bio/base/script/gen.py b/bob/bio/base/script/gen.py
new file mode 100644
index 0000000000000000000000000000000000000000..5cc3311f844020bce8efa9fe2aa8411383bd9db3
--- /dev/null
+++ b/bob/bio/base/script/gen.py
@@ -0,0 +1,99 @@
+"""Generate random scores.
+"""
+import pkg_resources  # to make sure bob gets imported properly
+import os
+import logging
+import numpy
+import random as rd
+import click
+from click.types import FLOAT
+from bob.extension.scripts.click_helper import verbosity_option
+from bob.core import random
+from bob.io.base import create_directories_safe
+
+logger = logging.getLogger(__name__)
+
+NUM_NEG = 5000
+NUM_POS = 5000
+
+def gen_score_distr(mean_neg, mean_pos, sigma_neg=10, sigma_pos=10):
+    """Generate scores from normal distributions
+
+    Parameters
+    ----------
+    mean_neg : float
+        Mean for negative scores
+    mean_pos : float
+        Mean for positive scores
+    sigma_neg : float
+        STDev for negative scores
+    sigma_pos : float
+        STDev for positive scores
+
+    Returns
+    -------
+    neg_scores : :any:`list`
+        Negatives scores
+    pos_scores : :any:`list`
+        Positive scores
+    """
+    mt = random.mt19937()  # initialise the random number generator
+
+    neg_generator = random.normal(numpy.float32, mean_neg, sigma_neg)
+    pos_generator = random.normal(numpy.float32, mean_pos, sigma_pos)
+
+    neg_scores = [neg_generator(mt) for _ in range(NUM_NEG)]
+    pos_scores = [pos_generator(mt) for _ in range(NUM_NEG)]
+
+    return neg_scores, pos_scores
+
+def write_scores_to_file(pos, neg, filename, n_sys=1, five_col=False):
+    """ Writes score distributions
+
+    Parameters
+    ----------
+    pos : :py:class:`numpy.ndarray`
+        Scores for positive samples.
+    neg : :py:class:`numpy.ndarray`
+        Scores for negative samples.
+    filename : str
+        The path to write the score to.
+    n_sys : int
+        Number of different systems
+    five_col : bool
+        If 5-colum format, else 4-column
+    """
+    create_directories_safe(os.path.dirname(filename))
+    s_names = ['s%d' % i for i in range(n_sys)]
+    with open(filename, 'wt') as f:
+        for i in pos:
+            s_name = rd.choice(s_names)
+            s_five = ' ' if not five_col else ' d' + rd.choice(s_names) + ' '
+            f.write('x%sx %s %f\n' % (s_five, s_name, i))
+        for i in neg:
+            s_name = rd.choice(s_names)
+            s_five = ' ' if not five_col else ' d' + rd.choice(s_names) + ' '
+            f.write('x%sy %s %f\n' % (s_five, s_name, i))
+
+@click.command()
+@click.argument('outdir')
+@click.option('-mm', '--mean-match', default=10, type=FLOAT, show_default=True)
+@click.option('-mnm', '--mean-non-match', default=-10, type=FLOAT, show_default=True)
+@click.option('-n', '--n-sys', default=1, type=click.INT, show_default=True)
+@click.option('--five-col/--four-col', default=False, show_default=True)
+@verbosity_option()
+def gen(outdir, mean_match, mean_non_match, n_sys, five_col):
+    """Generate random scores.
+    Generates random scores in 4col or 5col format. The scores are generated 
+    using Gaussian distribution whose mean is an input
+    parameter. The generated scores can be used as hypothetical datasets.
+    """
+    # Generate the data
+    neg_dev, pos_dev = gen_score_distr(mean_non_match, mean_match)
+    neg_eval, pos_eval = gen_score_distr(mean_non_match, mean_match)
+
+    # Write the data into files
+    write_scores_to_file(neg_dev, pos_dev,
+                         os.path.join(outdir, 'scores-dev'), n_sys, five_col)
+    write_scores_to_file(neg_eval, pos_eval,
+                         os.path.join(outdir, 'scores-eval'), n_sys, five_col)
diff --git a/bob/bio/base/test/test_commands.py b/bob/bio/base/test/test_commands.py
new file mode 100644
index 0000000000000000000000000000000000000000..c8706072814a4571ef3c984d00bc77baf844db37
--- /dev/null
+++ b/bob/bio/base/test/test_commands.py
@@ -0,0 +1,271 @@
+'''Tests for bob.measure scripts'''
+
+import sys
+import filecmp
+import click
+from click.testing import CliRunner
+import pkg_resources
+from ..script import commands
+
+def test_metrics():
+    dev1 = pkg_resources.resource_filename('bob.bio.base.test',
+                                           'data/dev-4col.txt')
+    runner = CliRunner()
+    result = runner.invoke(commands.metrics, ['--no-evaluation', dev1])
+    with runner.isolated_filesystem():
+        with open('tmp', 'w') as f:
+            f.write(result.output)
+        assert result.exit_code == 0, (result.exit_code, result.output)
+    dev2 = pkg_resources.resource_filename('bob.bio.base.test',
+                                           'data/dev-5col.txt')
+    test1 = pkg_resources.resource_filename('bob.bio.base.test',
+                                            'data/test-4col.txt')
+    test2 = pkg_resources.resource_filename('bob.bio.base.test',
+                                            'data/test-5col.txt')
+    with runner.isolated_filesystem():
+        result = runner.invoke(
+            commands.metrics, [dev1, test1, dev2, test2]
+        )
+        with open('tmp', 'w') as f:
+            f.write(result.output)
+        assert result.exit_code == 0, (result.exit_code, result.output)
+
+    with runner.isolated_filesystem():
+        result = runner.invoke(
+            commands.metrics, ['-l', 'tmp', '-ts', 'A,B',
+                               dev1, test1, dev2, test2]
+        )
+        assert result.exit_code == 0, (result.exit_code, result.output)
+    with runner.isolated_filesystem():
+        result = runner.invoke(
+            commands.metrics, ['-l', 'tmp', dev1, test2]
+        )
+        assert result.exit_code == 0, (result.exit_code, result.output)
+
+    with runner.isolated_filesystem():
+        result = runner.invoke(
+            commands.metrics, ['-l', 'tmp', '-T', '0.1',
+                               '--criter', 'mindcf', '--cost', 0.9,
+                               dev1, test2]
+        )
+        assert result.exit_code == 0, (result.exit_code, result.output)
+
+    with runner.isolated_filesystem():
+        result = runner.invoke(
+            commands.metrics, ['--no-evaluation', '-l', 'tmp',
+                               '--criter', 'mindcf', '--cost', 0.9,
+                               dev1]
+        )
+        assert result.exit_code == 0, (result.exit_code, result.output)
+
+    with runner.isolated_filesystem():
+        result = runner.invoke(
+            commands.metrics, ['--criter', 'cllr', dev1, test2]
+        )
+        assert result.exit_code == 0, (result.exit_code, result.output)
+
+    with runner.isolated_filesystem():
+        result = runner.invoke(
+            commands.metrics, ['--no-evaluation', '-l', 'tmp', '--criter', 'cllr',
+                               '--cost', 0.9, dev1]
+        )
+        assert result.exit_code == 0, (result.exit_code, result.output)
+
+    with runner.isolated_filesystem():
+        result = runner.invoke(
+            commands.metrics, ['--criter', 'rr', '-T',
+                               '0.1', dev1, test2]
+        )
+        assert result.exit_code == 0, (result.exit_code, result.output)
+
+    with runner.isolated_filesystem():
+        result = runner.invoke(
+            commands.metrics, ['--no-evaluation', '-l', 'tmp', '--criter', 'rr',
+                               dev1, dev2]
+        )
+        assert result.exit_code == 0, (result.exit_code, result.output)
+
+
+def test_roc():
+    dev1 = pkg_resources.resource_filename('bob.bio.base.test',
+                                           'data/dev-4col.txt')
+    runner = CliRunner()
+    with runner.isolated_filesystem():
+        result = runner.invoke(commands.roc, ['--no-evaluation', '--output',
+                                              'test.pdf',dev1])
+        if result.output:
+            click.echo(result.output)
+        assert result.exit_code == 0, (result.exit_code, result.output)
+    dev2 = pkg_resources.resource_filename('bob.bio.base.test',
+                                           'data/dev-5col.txt')
+    test1 = pkg_resources.resource_filename('bob.bio.base.test',
+                                            'data/test-4col.txt')
+    test2 = pkg_resources.resource_filename('bob.bio.base.test',
+                                            'data/test-5col.txt')
+    with runner.isolated_filesystem():
+        result = runner.invoke(commands.roc, ['--split', '--output',
+                                              'test.pdf',
+                                              dev1, test1, dev2, test2])
+        if result.output:
+            click.echo(result.output)
+        assert result.exit_code == 0, (result.exit_code, result.output)
+
+    with runner.isolated_filesystem():
+        result = runner.invoke(commands.roc, ['--output',
+                                              'test.pdf', '--titles', 'A,B', 
+                                              dev1, test1, dev2, test2])
+        if result.output:
+            click.echo(result.output)
+        assert result.exit_code == 0, (result.exit_code, result.output)
+
+
+def test_det():
+    dev1 = pkg_resources.resource_filename('bob.bio.base.test',
+                                           'data/dev-4col.txt')
+    runner = CliRunner()
+    with runner.isolated_filesystem():
+        result = runner.invoke(commands.det, ['--no-evaluation', dev1])
+        if result.output:
+            click.echo(result.output)
+        assert result.exit_code == 0, (result.exit_code, result.output)
+    dev2 = pkg_resources.resource_filename('bob.bio.base.test',
+                                           'data/dev-5col.txt')
+    test1 = pkg_resources.resource_filename('bob.bio.base.test',
+                                            'data/test-4col.txt')
+    test2 = pkg_resources.resource_filename('bob.bio.base.test',
+                                            'data/test-5col.txt')
+    with runner.isolated_filesystem():
+        result = runner.invoke(commands.det, ['--split', '--output',
+                                              'test.pdf', '--titles', 'A,B',
+                                              dev1, test1, dev2, test2])
+        if result.output:
+            click.echo(result.output)
+        assert result.exit_code == 0, (result.exit_code, result.output)
+    with runner.isolated_filesystem():
+        result = runner.invoke(commands.det, ['--output',
+                                              'test.pdf',
+                                              dev1, test1, dev2, test2])
+        if result.output:
+            click.echo(result.output)
+        assert result.exit_code == 0, (result.exit_code, result.output)
+
+def test_epc():
+    dev1 = pkg_resources.resource_filename('bob.bio.base.test',
+                                           'data/dev-4col.txt')
+    test1 = pkg_resources.resource_filename('bob.bio.base.test',
+                                            'data/test-4col.txt')
+    runner = CliRunner()
+    with runner.isolated_filesystem():
+        result = runner.invoke(commands.epc, [dev1, test1])
+        if result.output:
+            click.echo(result.output)
+        assert result.exit_code == 0, (result.exit_code, result.output)
+    dev2 = pkg_resources.resource_filename('bob.bio.base.test',
+                                           'data/dev-4col.tar.gz')
+    test2 = pkg_resources.resource_filename('bob.bio.base.test',
+                                            'data/test-5col.txt')
+    with runner.isolated_filesystem():
+        result = runner.invoke(commands.epc, ['--output', 'test.pdf',
+                                              '--titles', 'A,B',
+                                              dev1, test1, dev2, test2])
+        if result.output:
+            click.echo(result.output)
+        assert result.exit_code == 0, (result.exit_code, result.output)
+
+def test_hist():
+    dev1 = pkg_resources.resource_filename('bob.bio.base.test',
+                                           'data/dev-4col.txt')
+    dev2 = pkg_resources.resource_filename('bob.bio.base.test',
+                                           'data/dev-5col.txt')
+    test1 = pkg_resources.resource_filename('bob.bio.base.test',
+                                            'data/test-4col.txt')
+    test2 = pkg_resources.resource_filename('bob.bio.base.test',
+                                            'data/test-5col.txt')
+    runner = CliRunner()
+    with runner.isolated_filesystem():
+        result = runner.invoke(commands.hist, ['--no-evaluation', dev1])
+        if result.output:
+            click.echo(result.output)
+        assert result.exit_code == 0, (result.exit_code, result.output)
+
+    with runner.isolated_filesystem():
+        result = runner.invoke(commands.hist, ['--criter', 'hter', '--output',
+                                               'HISTO.pdf', '-b',
+                                               30,'--no-evaluation', dev1, dev2])
+        if result.output:
+            click.echo(result.output)
+        assert result.exit_code == 0, (result.exit_code, result.output)
+
+    with runner.isolated_filesystem():
+        result = runner.invoke(commands.hist, ['--criter', 'eer', '--output',
+                                               'HISTO.pdf', '-b', 30,
+                                               '-ts', 'A,B', dev1, test1, dev2,
+                                               test2])
+        if result.output:
+            click.echo(result.output)
+        assert result.exit_code == 0, (result.exit_code, result.output)
+
+def test_cmc():
+    dev1 = pkg_resources.resource_filename('bob.bio.base.test',
+                                           'data/scores-cmc-5col.txt')
+    runner = CliRunner()
+    with runner.isolated_filesystem():
+        result = runner.invoke(commands.cmc, ['--no-evaluation', dev1])
+        if result.output:
+            click.echo(result.output)
+        assert result.exit_code == 0, (result.exit_code, result.output)
+    test1 = pkg_resources.resource_filename('bob.bio.base.test',
+                                            'data/scores-cmc-4col.txt')
+    with runner.isolated_filesystem():
+        result = runner.invoke(commands.cmc, ['--output', 'test.pdf',
+                                              '--titles', 'A,B',
+                                              dev1, test1, dev1, test1])
+        if result.output:
+            click.echo(result.output)
+        assert result.exit_code == 0, (result.exit_code, result.output)
+
+def test_dic():
+    dev1 = pkg_resources.resource_filename('bob.bio.base.test',
+                                           'data/scores-nonorm-openset-dev')
+    runner = CliRunner()
+    with runner.isolated_filesystem():
+        result = runner.invoke(commands.dic, ['--no-evaluation', dev1, '--rank', 2])
+        if result.output:
+            click.echo(result.output)
+        assert result.exit_code == 0, (result.exit_code, result.output)
+    test1 = pkg_resources.resource_filename('bob.bio.base.test',
+                                            'data/scores-nonorm-openset-dev')
+    with runner.isolated_filesystem():
+        result = runner.invoke(commands.dic, ['--output', 'test.pdf',
+                                              '--titles', 'A,B',
+                                              dev1, test1, dev1, test1])
+        if result.output:
+            click.echo(result.output)
+        assert result.exit_code == 0, (result.exit_code, result.output)
+
+def test_evaluate():
+    dev1 = pkg_resources.resource_filename('bob.bio.base.test',
+                                           'data/dev-4col.txt')
+    dev2 = pkg_resources.resource_filename('bob.bio.base.test',
+                                           'data/dev-5col.txt')
+
+    test1 = pkg_resources.resource_filename('bob.bio.base.test',
+                                            'data/test-4col.txt')
+    test2 = pkg_resources.resource_filename('bob.bio.base.test',
+                                            'data/test-5col.txt')
+    runner = CliRunner()
+    with runner.isolated_filesystem():
+        result = runner.invoke(commands.evaluate, ['-l', 'tmp', '-f', 0.03,
+                                                   '--no-evaluation', dev1, dev2])
+        assert result.exit_code == 0, (result.exit_code, result.output)
+        result = runner.invoke(commands.evaluate, ['--no-evaluation', '-f', 0.02,
+                                                   dev1, dev2])
+        assert result.exit_code == 0, (result.exit_code, result.output)
+
+        result = runner.invoke(commands.evaluate, ['-l', 'tmp', '-f', 0.04,
+                                                   dev1, test1, dev2, test2])
+        assert result.exit_code == 0, (result.exit_code, result.output)
+        result = runner.invoke(commands.evaluate, ['-f', 0.01,
+                                                   dev1, test1, dev2, test2])
+        assert result.exit_code == 0, (result.exit_code, result.output)
+
diff --git a/doc/experiments.rst b/doc/experiments.rst
index b6dc2190c7aa0a102ec87c1154eead637fa390b8..16bd85893848dd28fe4e8de0a8cf94dec2152098 100644
--- a/doc/experiments.rst
+++ b/doc/experiments.rst
@@ -154,26 +154,121 @@ However, to be consistent, throughout this documentation we document the options
 
 Evaluating Experiments
 ----------------------
-After the experiment has finished successfully, one or more text file containing all the scores are written.
 
-To evaluate the experiment, you can use the generic ``evaluate.py`` script, which has properties for all prevalent evaluation types, such as CMC, DIR, ROC and DET plots, as well as computing recognition rates, EER/HTER, Cllr and minDCF.
-Additionally, a combination of different algorithms can be plotted into the same files.
-Just specify all the score files that you want to evaluate using the ``--dev-files`` option, and possible legends for the plots (in the same order) using the ``--legends`` option, and the according plots will be generated.
-For example, to create a ROC curve for the experiment above, use:
+After the experiment has finished successfully, one or more text file containing
+all the scores are written. In this section, commands that helps to quickly
+evaluate a set of scores by generating metrics or plots are presented here.
+The scripts take as input either a 4-column or 5-column data format as specified
+in the documentation of :py:func:`bob.bio.base.score.load.four_column` or 
+:py:func:`bob.bio.base.score.load.five_column`.
+
+Metrics
+=======
+
+To calculate the threshold using a certain criterion (EER (default), FAR or
+min.HTER) on a development set and apply it on an evaluation set, just do:
+
+.. code-block:: sh
+
+    $ bob bio metrics {dev,test}-4col.txt --titltes ExpA --criter hter
+
+    [Min. criterion: HTER ] Threshold on Development set `ExpA`: -4.830500e-03
+    ======  ======================  =================
+    ExpA    Development dev-4col    Eval. test-4col
+    ======  ======================  =================
+    FtA     0.0%                    0.0%
+    FMR     6.7% (35/520)           2.5% (13/520)
+    FNMR    6.7% (26/390)           6.2% (24/390)
+    FAR     6.7%                    2.5%
+    FRR     6.7%                    6.2%
+    HTER    6.7%                    4.3%
+    ======  ======================  =================
+
+.. note::
+    You can compute analysis on development set(s) only by passing option
+    ``--no-evaluation``. See metrics --help for further options.
+
+You can also compute measure such as recognition rate (``rr``), Cllr and
+minCllr (``cllr``) and minDCF (``mindcf``) by passing the corresponding option.
+For example:
+
+.. code-block:: sh
+
+    bob bio metrics {dev,test}-4col.txt --titltes ExpA --criter cllr
+
+    ======  ======================  ================
+    Computing  Cllr and minCllr...
+    =======  ======================  ================
+    None     Development dev-4col    eval test-4col
+    =======  ======================  ================
+    Cllr     0.9%                    0.9%
+    minCllr  0.2%                    0.2%
+    =======  ======================  ================
+
+.. note::
+    You must provide files in the correct format depending on the measure you
+    want to compute. For example, recognition rate takes cmc type files. See
+    :py:func:`bob.bio.base.score.load.cmc`.
+
+Plots
+=====
+
+Customizable plotting commands are available in the :py:mod:`bob.bio.base` module.
+They take a list of development and/or evaluation files and generate a single PDF
+file containing the plots. Available plots are:
+
+*  ``roc`` (receiver operating characteristic)
+
+*  ``det`` (detection error trade-off)
+
+*  ``epc`` (expected performance curve)
+
+*  ``hist`` (histograms of scores with threshold line)
+
+*  ``cmc`` (cumulative match characteristic curve)
+
+*  ``dic`` (detection identification curve)
+
+Use the ``--help`` option on the above-cited commands to find-out about more
+options.
+
+
+For example, to generate a CMC curve from development and evaluation datasets:
 
 .. code-block:: sh
 
-   $ evaluate.py --dev-files results/pca-experiment/male/nonorm/scores-dev --legend MOBIO --roc MOBIO_MALE_ROC.pdf -vv
+    $bob bio cmc --output 'my_cmc.pdf' dev-1.txt eval-1.txt 
+    dev-2.txt eval-2.txt
 
-Please note that there exists another file called ``Experiment.info`` inside the result directory.
-This file is a pure text file and contains the complete configuration of the experiment.
-With this configuration it is possible to inspect all default parameters of the algorithms, and even to re-run the exact same experiment.
+where `my_cmc.pdf` will contain CMC curves for the two experiments.
 
+.. note::
+    By default, ``det``, ``roc``, ``cmc`` and ``dic`` plot development and
+    evaluation curves on
+    different plots. You can force gather everything in the same plot using
+    ``--no-split`` option.
+
+Evaluate
+========
+
+A convenient command `evaluate` is provided to generate multiple metrics and
+plots for a list of experiments. It generates two `metrics` outputs with EER, 
+HTER, Cllr, minDCF criteria along with `roc`, `det`, `epc`, `hist` plots for each
+experiment. For example:
+
+.. code-block:: sh
+
+    $bob bio evaluate -l 'my_metrics.txt' -o 'my_plots.pdf' {sys1, sys2}/
+    {eval,dev}
+
+will output metrics and plots for the two experiments (dev and eval pairs) in
+`my_metrics.txt` and `my_plots.pdf`, respectively.
 
 .. _running_in_parallel:
 
 Running in Parallel
 -------------------
+
 One important property of the ``verify.py`` script is that it can run in parallel, using either several processes on the local machine, or an SGE grid.
 To achieve that, ``bob.bio`` is well-integrated with our SGE grid toolkit GridTK_, which we have selected as a python package in the :ref:`Installation <bob.bio.base.installation>` section.
 The ``verify.py`` script can submit jobs either to the SGE grid, or to a local scheduler, keeping track of dependencies between the jobs.
@@ -210,7 +305,7 @@ One set of command line options change the directory structure of the output.
 By default, intermediate (temporary) files are by default written to the ``temp`` directory, which can be overridden by the ``temp_directory`` variable, which expects relative or absolute paths.
 
 Re-using Parts of Experiments
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+=============================
 If you want to re-use parts previous experiments, you can specify the directories (which are relative to the ``temp_directory``, but you can also specify absolute paths):
 
 * ``preprocessed_directory``
@@ -245,7 +340,7 @@ This option is particularly useful for debugging purposes.
 
 
 Database-dependent Variables
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+============================
 Many databases define several protocols that can be executed.
 To change the protocol, you can either modify the configuration file, or simply use the ``protocol`` variable.
 
@@ -264,13 +359,13 @@ Other Variables
 ---------------
 
 Calibration
-~~~~~~~~~~~
+===========
 For some applications it is interesting to get calibrated scores.
 Simply set the variable ``calibrate_scores = True`` and another set of score files will be created by training the score calibration on the scores of the ``'dev'`` group and execute it to all available groups.
 The scores will be located at the same directory as the **nonorm** and **ztnorm** scores, and the file names are **calibrated-dev** (and **calibrated-eval** if applicable).
 
 Unsuccessful Preprocessing
-~~~~~~~~~~~~~~~~~~~~~~~~~~
+==========================
 In some cases, the preprocessor is not able to preprocess the data (e.g., for face image processing the face detector might not detect the face).
 If you expect such cases to happen, you might want to use the ``allow_missing_files`` variable.
 When this variable is set to ``True``, missing files will be handled correctly throughout the whole processing chain, i.e.:
diff --git a/doc/py_api.rst b/doc/py_api.rst
index 4d52687cbea4d63cf1e3855f9095f9f975cec249..d89f63dca0b70310f3cd2ed90f6a76e4742ba92a 100644
--- a/doc/py_api.rst
+++ b/doc/py_api.rst
@@ -93,6 +93,7 @@ Scoring
    bob.bio.base.tools.compute_scores
    bob.bio.base.tools.concatenate
    bob.bio.base.tools.calibrate
+   bob.bio.base.script.figure.Metrics
 
 Loading data
 ------------
@@ -108,6 +109,13 @@ Loading data
    bob.bio.base.score.load.split_five_column
    bob.bio.base.score.load.cmc_five_column
 
+Plotting
+--------
+.. autosummary::
+   bob.bio.base.script.figure.Cmc
+   bob.bio.base.script.figure.Dic
+   bob.bio.base.script.figure.Hist
+
 OpenBR conversions
 ------------------
 .. autosummary::
@@ -130,4 +138,8 @@ Details
 
 .. automodule:: bob.bio.base.score.load
 .. automodule:: bob.bio.base.score.openbr
+.. automodule:: bob.bio.base.script.figure
+.. automodule:: bob.bio.base.script.commands
+.. automodule:: bob.bio.base.script.gen
+
 .. include:: links.rst
diff --git a/setup.py b/setup.py
index d33d06d3c71f8cf11d37a92bb4181b464c3adf73..042c6336bab4bf3a9b5bc08452b6a656cec0d94e 100644
--- a/setup.py
+++ b/setup.py
@@ -73,7 +73,6 @@ setup(
         'verify.py         = bob.bio.base.script.verify:main',
         'resources.py      = bob.bio.base.script.resources:resources',
         'databases.py      = bob.bio.base.script.resources:databases',
-        'evaluate.py       = bob.bio.base.script.evaluate:main',
         'collect_results.py = bob.bio.base.script.collect_results:main',
         'grid_search.py    = bob.bio.base.script.grid_search:main',
         'preprocess.py     = bob.bio.base.script.preprocess:main',
@@ -139,6 +138,15 @@ setup(
       # bob bio scripts
       'bob.bio.cli': [
         'annotate          = bob.bio.base.script.annotate:annotate',
+        'metrics           = bob.bio.base.script.commands:metrics',
+        'roc               = bob.bio.base.script.commands:roc',
+        'det               = bob.bio.base.script.commands:det',
+        'epc               = bob.bio.base.script.commands:epc',
+        'hist              = bob.bio.base.script.commands:hist',
+        'cmc               = bob.bio.base.script.commands:cmc',
+        'dic               = bob.bio.base.script.commands:dic',
+        'gen               = bob.bio.base.script.gen:gen',
+        'evaluate          = bob.bio.base.script.commands:evaluate',
       ],
 
       # annotators