diff --git a/bob/pad/base/script/pad_commands.py b/bob/pad/base/script/pad_commands.py
index bd0b54df7fbe9def3f6fd32bb7b8348a477c1ad3..d3877d8ac316563a1f52f8f45972abcf60507d04 100644
--- a/bob/pad/base/script/pad_commands.py
+++ b/bob/pad/base/script/pad_commands.py
@@ -88,7 +88,7 @@ def evaluate(ctx, scores, evaluation, **kwargs):
     common_options.MULTI_METRICS_HELP.format(
         names='FtA, APCER, BPCER, FAR, FRR, ACER',
         criteria=CRITERIA, score_format=SCORE_FORMAT,
-        command='bob measure multi-metrics'),
+        command='bob pad multi-metrics'),
     criteria=CRITERIA)
 def multi_metrics(ctx, scores, evaluation, protocols_number, **kwargs):
   ctx.meta['min_arg'] = protocols_number * (2 if evaluation else 1)
diff --git a/bob/pad/base/script/vuln_commands.py b/bob/pad/base/script/vuln_commands.py
index 7e93a462f2083d0c1db08fb93cb18106c63f9e9b..a08962085b1eda7a8bac28d50d71cd83b386e256 100644
--- a/bob/pad/base/script/vuln_commands.py
+++ b/bob/pad/base/script/vuln_commands.py
@@ -2,17 +2,12 @@
 """
 
 import os
-import logging
 import numpy
 import click
-import pkg_resources
-from click_plugins import with_plugins
 from click.types import FLOAT
 from bob.measure.script import common_options
-from bob.extension.scripts.click_helper import (verbosity_option,
-                                                open_file_mode_option,
-                                               bool_option,
-                                               AliasedGroup, list_float_option)
+from bob.extension.scripts.click_helper import (
+    verbosity_option, bool_option, list_float_option)
 from bob.core import random
 from bob.io.base import create_directories_safe
 from bob.bio.base.score import load
@@ -23,15 +18,15 @@ NUM_ZEIMPOSTORS = 5000
 NUM_PA = 5000
 
 
-def hlines_at_option(dflt=' ', **kwargs):
-    '''Get option to draw const FNMRlines'''
-    return list_float_option(
-        name='hlines-at', short_name='hla',
-        desc='If given, draw horizontal lines at the given axis positions. '
-        'Your values must be separated with a comma (,) without space. '
-        'This option works in ROC and DET curves.',
-        nitems=None, dflt=dflt, **kwargs
-    )
+def fnmr_at_option(dflt=' ', **kwargs):
+  '''Get option to draw const FNMR lines'''
+  return list_float_option(
+      name='fnmr', short_name='fnmr',
+      desc='If given, draw horizontal lines at the given FNMR position. '
+      'Your values must be separated with a comma (,) without space. '
+      'This option works in ROC and DET curves.',
+      nitems=None, dflt=dflt, **kwargs
+  )
 
 
 def gen_score_distr(mean_gen, mean_zei, mean_pa, sigma_gen=1, sigma_zei=1,
@@ -49,7 +44,6 @@ def gen_score_distr(mean_gen, mean_zei, mean_pa, sigma_gen=1, sigma_zei=1,
   return genuine_scores, zei_scores, pa_scores
 
 
-
 def write_scores_to_file(neg, pos, filename, attack=False):
   """Writes score distributions into 4-column score files. For the format of
     the 4-column score files, please refer to Bob's documentation.
@@ -65,14 +59,13 @@ def write_scores_to_file(neg, pos, filename, attack=False):
   """
   create_directories_safe(os.path.dirname(filename))
   with open(filename, 'wt') as f:
-      for i in pos:
-          f.write('x x foo %f\n' % i)
-      for i in neg:
-          if attack:
-              f.write('x attack foo %f\n' % i)
-          else:
-              f.write('x y foo %f\n' % i)
-
+    for i in pos:
+      f.write('x x foo %f\n' % i)
+    for i in neg:
+      if attack:
+        f.write('x attack foo %f\n' % i)
+      else:
+        f.write('x y foo %f\n' % i)
 
 
 @click.command()
@@ -108,7 +101,6 @@ def gen(outdir, mean_gen, mean_zei, mean_pa):
                        attack=True)
 
 
-
 @click.command()
 @common_options.scores_argument(min_arg=2, nargs=-1)
 @common_options.output_plot_file_option(default_out='vuln_roc.pdf')
@@ -128,7 +120,7 @@ def gen(outdir, mean_gen, mean_zei, mean_pa):
 @click.option('--real-data/--no-real-data', default=True, show_default=True,
               help='If False, will annotate the plots hypothetically, instead '
               'of with real data values of the calculated error rates.')
-@hlines_at_option()
+@fnmr_at_option()
 @click.pass_context
 def roc(ctx, scores, real_data, **kwargs):
   """Plot ROC
@@ -167,7 +159,7 @@ def roc(ctx, scores, real_data, **kwargs):
 @click.option('--real-data/--no-real-data', default=True, show_default=True,
               help='If False, will annotate the plots hypothetically, instead '
               'of with real data values of the calculated error rates.')
-@hlines_at_option()
+@fnmr_at_option()
 @click.pass_context
 def det(ctx, scores, real_data, **kwargs):
   """Plot DET
@@ -189,7 +181,6 @@ def det(ctx, scores, real_data, **kwargs):
   process.run()
 
 
-
 @click.command()
 @common_options.scores_argument(min_arg=2, force_eval=True, nargs=-1)
 @common_options.output_plot_file_option(default_out='vuln_epc.pdf')
@@ -234,7 +225,6 @@ def epc(ctx, scores, **kwargs):
   process.run()
 
 
-
 @click.command()
 @common_options.scores_argument(min_arg=2, force_eval=True, nargs=-1)
 @common_options.output_plot_file_option(default_out='vuln_epsc.pdf')
@@ -270,43 +260,42 @@ def epc(ctx, scores, **kwargs):
 @click.pass_context
 def epsc(ctx, scores, criteria, var_param, fixed_param, three_d, sampling,
          **kwargs):
-    """Plot EPSC (expected performance spoofing curve):
-
-    You need to provide 4 score
-    files for each biometric system in this order:
+  """Plot EPSC (expected performance spoofing curve):
 
-    \b
-    * licit development scores
-    * licit evaluation scores
-    * spoof development scores
-    * spoof evaluation scores
+  You need to provide 4 score
+  files for each biometric system in this order:
 
-    See :ref:`bob.pad.base.vulnerability` in the documentation for a guide on
-    vulnerability analysis.
+  \b
+  * licit development scores
+  * licit evaluation scores
+  * spoof development scores
+  * spoof evaluation scores
 
-    Note that when using 3D plots with option ``--three-d``, you cannot plot
-    both WER and IAPMR on the same figure (which is possible in 2D).
+  See :ref:`bob.pad.base.vulnerability` in the documentation for a guide on
+  vulnerability analysis.
 
-    Examples:
-        $ bob vuln epsc -v -o my_epsc.pdf dev-scores1 eval-scores1
+  Note that when using 3D plots with option ``--three-d``, you cannot plot
+  both WER and IAPMR on the same figure (which is possible in 2D).
 
-        $ bob vuln epsc -v -D {licit,spoof}/scores-{dev,eval}
-    """
-    if three_d:
-        if (ctx.meta['wer'] and ctx.meta['iapmr']):
-            raise click.BadParameter('Cannot plot both WER and IAPMR in 3D')
-        ctx.meta['sampling'] = sampling
-        process = figure.Epsc3D(
-            ctx, scores, True, load.split,
-            criteria, var_param, fixed_param
-        )
-    else:
-        process = figure.Epsc(
-            ctx, scores, True, load.split,
-            criteria, var_param, fixed_param
-        )
-    process.run()
+  Examples:
+      $ bob vuln epsc -v -o my_epsc.pdf dev-scores1 eval-scores1
 
+      $ bob vuln epsc -v -D {licit,spoof}/scores-{dev,eval}
+  """
+  if three_d:
+    if (ctx.meta['wer'] and ctx.meta['iapmr']):
+      raise click.BadParameter('Cannot plot both WER and IAPMR in 3D')
+    ctx.meta['sampling'] = sampling
+    process = figure.Epsc3D(
+        ctx, scores, True, load.split,
+        criteria, var_param, fixed_param
+    )
+  else:
+    process = figure.Epsc(
+        ctx, scores, True, load.split,
+        criteria, var_param, fixed_param
+    )
+  process.run()
 
 
 @click.command()
@@ -366,38 +355,6 @@ def hist(ctx, scores, evaluation, **kwargs):
   process.run()
 
 
-
-@click.command(context_settings=dict(token_normalize_func=lambda x: x.lower()))
-@common_options.scores_argument(min_arg=2, force_eval=True, nargs=-1)
-@common_options.table_option()
-@common_options.criterion_option(lcriteria=['eer', 'min-hter'])
-@common_options.thresholds_option()
-@open_file_mode_option()
-@common_options.output_log_metric_option()
-@common_options.legends_option()
-@verbosity_option()
-@click.pass_context
-def metrics(ctx, scores, **kwargs):
-  """Generate table of metrics for vulnerability PAD
-
-  You need to provide 4 scores
-  files for each vuln system in this order:
-
-  \b
-  * licit development scores
-  * licit evaluation scores
-  * spoof development scores
-  * spoof evaluation scores
-
-
-  Examples:
-      $ bob vuln vuln_metrics -v {licit,spoof}/scores-{dev,eval}
-  """
-  process = figure.Metrics(ctx, scores, True, load.split)
-  process.run()
-
-
-
 @click.command()
 @common_options.scores_argument(min_arg=2, force_eval=True, nargs=-1)
 @common_options.output_plot_file_option(default_out='fmr_iapmr.pdf')
@@ -416,52 +373,10 @@ def metrics(ctx, scores, **kwargs):
 @common_options.semilogx_option()
 @click.pass_context
 def fmr_iapmr(ctx, scores, **kwargs):
-    """Plot FMR vs IAPMR
-
-    You need to provide 4 scores
-    files for each vuln system in this order:
-
-    \b
-    * licit development scores
-    * licit evaluation scores
-    * spoof development scores
-    * spoof evaluation scores
-
-    Examples:
-        $ bob vuln fmr_iapmr -v dev-scores eval-scores
+  """Plot FMR vs IAPMR
 
-        $ bob vuln fmr_iapmr -v {licit,spoof}/scores-{dev,eval}
-    """
-    process = figure.FmrIapmr(ctx, scores, True, load.split)
-    process.run()
-
-
-
-@click.command()
-@common_options.scores_argument(min_arg=2, force_eval=True, nargs=-1)
-@common_options.legends_option()
-@common_options.sep_dev_eval_option()
-@common_options.table_option()
-@common_options.output_log_metric_option()
-@common_options.output_plot_file_option(default_out='vuln_eval.pdf')
-@common_options.points_curve_option()
-@common_options.lines_at_option()
-@common_options.const_layout_option()
-@common_options.figsize_option(dflt=None)
-@common_options.style_option()
-@common_options.linestyles_option()
-@verbosity_option()
-@click.pass_context
-def evaluate(ctx, scores, **kwargs):
-  '''Runs error analysis on score sets for vulnerability studies
-
-  \b
-  1. Computes bob vuln vuln_metrics
-  2. Plots EPC, EPSC, vulnerability histograms, fmr vs IAPMR to a multi-page
-     PDF file
-
-
-  You need to provide 4 score files for each biometric system in this order:
+  You need to provide 4 scores
+  files for each vuln system in this order:
 
   \b
   * licit development scores
@@ -470,31 +385,9 @@ def evaluate(ctx, scores, **kwargs):
   * spoof evaluation scores
 
   Examples:
-      $ bob vuln evaluate -o my_epsc.pdf dev-scores1 eval-scores1
+      $ bob vuln fmr_iapmr -v dev-scores eval-scores
 
-      $ bob vuln evaluate -D {licit,spoof}/scores-{dev,eval}
-  '''
-  # first time erase if existing file
-  click.echo("Computing vuln metrics...")
-  ctx.invoke(metrics, scores=scores, evaluation=True)
-  if 'log' in ctx.meta and ctx.meta['log'] is not None:
-      click.echo("[metrics] => %s" % ctx.meta['log'])
-
-  # avoid closing pdf file before all figures are plotted
-  ctx.meta['closef'] = False
-  click.echo("Computing histograms...")
-  ctx.meta['criterion'] = 'eer'  # no criterion passed in evaluate
-  ctx.forward(hist)  # use class defaults plot settings
-  click.echo("Computing DET...")
-  ctx.forward(det)  # use class defaults plot settings
-  click.echo("Computing ROC...")
-  ctx.forward(roc)  # use class defaults plot settings
-  click.echo("Computing EPC...")
-  ctx.forward(epc)  # use class defaults plot settings
-  click.echo("Computing EPSC...")
-  ctx.forward(epsc)  # use class defaults plot settings
-  click.echo("Computing FMR vs IAPMR...")
-  ctx.meta['closef'] = True
-  ctx.forward(fmr_iapmr)  # use class defaults plot settings
-  click.echo("Vuln successfully completed!")
-  click.echo("[plots] => %s" % (ctx.meta['output']))
+      $ bob vuln fmr_iapmr -v {licit,spoof}/scores-{dev,eval}
+  """
+  process = figure.FmrIapmr(ctx, scores, True, load.split)
+  process.run()
diff --git a/bob/pad/base/script/vuln_figure.py b/bob/pad/base/script/vuln_figure.py
index 828baa934c7034274b0a309c8391b4f2fe37ca7f..bcc9bb4cd65eccdc80a60a65b0b6b39bc1ccd0ec 100644
--- a/bob/pad/base/script/vuln_figure.py
+++ b/bob/pad/base/script/vuln_figure.py
@@ -1,51 +1,18 @@
 '''Runs error analysis on score sets, outputs metrics and plots'''
 
-import math
 import click
 import numpy as np
 import matplotlib.pyplot as mpl
 import bob.measure.script.figure as measure_figure
-import bob.bio.base.script.figure as bio_figure
-from tabulate import tabulate
 from bob.measure.utils import get_fta_list
 from bob.measure import (
-    frr_threshold, far_threshold, eer_threshold, min_hter_threshold, farfrr,
-    epc, ppndf, min_weighted_error_rate_threshold
+    frr_threshold, far_threshold, farfrr,
+    ppndf, min_weighted_error_rate_threshold
 )
-from bob.measure.plot import (det, det_axis, roc_for_far, log_values, epc)
+from bob.measure import plot
 from . import error_utils
 
 
-class Metrics(measure_figure.Metrics):
-    def __init__(self, ctx, scores, evaluation, func_load):
-        super(Metrics, self).__init__(ctx, scores, evaluation, func_load)
-
-    ''' Compute metrics from score files'''
-
-    def compute(self, idx, input_scores, input_names):
-        ''' Compute metrics for the given criteria'''
-        # extract pos and negative and remove NaNs
-        neg_list, pos_list, _ = get_fta_list(input_scores)
-        dev_neg, dev_pos = neg_list[0], pos_list[0]
-        criter = self._criterion or 'eer'
-        threshold = error_utils.calc_threshold(criter, dev_neg, dev_pos) \
-            if self._thres is None else self._thres[idx]
-        far, frr = farfrr(neg_list[1], pos_list[1], threshold)
-        iapmr, _ = farfrr(neg_list[3], pos_list[1], threshold)
-        title = self._legends[idx] if self._legends is not None else None
-        headers = ['' or title, '%s (threshold=%.2g)' %
-                   (criter.upper(), threshold)]
-        rows = []
-        rows.append(['FMR (%)', '{:>5.1f}%'.format(100 * far)])
-        rows.append(['FNMR (%)', '{:>5.1f}%'.format(frr * 100)])
-        rows.append(['HTER (%)', '{:>5.1f}%'.format(50 * (far + frr))])
-        rows.append(['IAPMR (%)', '{:>5.1f}%'.format(100 * iapmr)])
-        click.echo(
-            tabulate(rows, headers, self._tablefmt),
-            file=self.log_file
-        )
-
-
 def _iapmr_dot(threshold, iapmr, real_data, **kwargs):
     # plot a dot on threshold versus IAPMR line and show IAPMR as a number
     axlim = mpl.axis()
@@ -185,7 +152,7 @@ class Epc(PadPlot):
         mpl.gcf().clear()
         mpl.grid()
 
-        epc(
+        plot.epc(
             licit_dev_neg, licit_dev_pos, licit_eval_neg, licit_eval_pos,
             self._points,
             color='C0', linestyle=self._linestyles[idx],
@@ -228,7 +195,6 @@ class Epc(PadPlot):
             ax1.tick_params(axis='y', colors='C0')
             ax1.spines['left'].set_color('C0')
 
-
         title = self._legends[idx] if self._legends is not None else self._title
         if title.replace(' ', ''):
             mpl.title(title)
@@ -448,7 +414,7 @@ class BaseVulnDetRoc(PadPlot):
         super(BaseVulnDetRoc, self).__init__(
             ctx, scores, evaluation, func_load)
         self._no_spoof = no_spoof
-        self._hlines_at = ctx.meta.get('hlines_at', [])
+        self._fnmrs_at = ctx.meta.get('fnmr', [])
         self._real_data = True if real_data is None else real_data
         self._legend_loc = None
 
@@ -469,7 +435,7 @@ class BaseVulnDetRoc(PadPlot):
         if not self._no_spoof and spoof_neg is not None:
             ax1 = mpl.gca()
             ax2 = ax1.twiny()
-            ax2.set_xlabel('IAPMR', color='C3')
+            ax2.set_xlabel('IAPMR (%)', color='C3')
             ax2.set_xticklabels(ax2.get_xticks())
             ax2.tick_params(axis='x', colors='C3')
             ax2.xaxis.label.set_color('C3')
@@ -487,10 +453,10 @@ class BaseVulnDetRoc(PadPlot):
             )
             mpl.sca(ax1)
 
-        if self._hlines_at is None:
+        if self._fnmrs_at is None:
             return
 
-        for line in self._hlines_at:
+        for line in self._fnmrs_at:
             thres_baseline = frr_threshold(licit_neg, licit_pos, line)
 
             axlim = mpl.axis()
@@ -547,7 +513,6 @@ class BaseVulnDetRoc(PadPlot):
                 label=label_spoof
             )  # FAR point, spoof scenario
 
-
     def end_process(self):
         ''' Set title, legend, axis labels, grid colors, save figures and
         close pdf is needed '''
@@ -594,9 +559,9 @@ class DetVuln(BaseVulnDetRoc):
     def __init__(self, ctx, scores, evaluation, func_load, real_data,
                  no_spoof):
         super(DetVuln, self).__init__(ctx, scores, evaluation, func_load,
-                                  real_data, no_spoof)
-        self._x_label = self._x_label or "FMR"
-        self._y_label = self._y_label or "FNMR"
+                                      real_data, no_spoof)
+        self._x_label = self._x_label or "FMR (%)"
+        self._y_label = self._y_label or "FNMR (%)"
         add = ''
         if not self._no_spoof:
             add = " and overlaid SPOOF scenario"
@@ -605,16 +570,16 @@ class DetVuln(BaseVulnDetRoc):
 
     def _set_axis(self):
         if self._axlim is not None and None not in self._axlim:
-            det_axis(self._axlim)
+            plot.det_axis(self._axlim)
         else:
-            det_axis([0.01, 99, 0.01, 99])
+            plot.det_axis([0.01, 99, 0.01, 99])
 
     def _get_farfrr(self, x, y, thres):
         points = farfrr(x, y, thres)
         return points, [ppndf(i) for i in points]
 
     def _plot(self, x, y, points, **kwargs):
-        det(
+        plot.det(
             x, y, points,
             color=kwargs.get('color'),
             linestyle=kwargs.get('linestyle'),
@@ -639,9 +604,9 @@ class RocVuln(BaseVulnDetRoc):
         self._legend_loc = self._legend_loc or best_legend
 
     def _plot(self, x, y, points, **kwargs):
-        roc_for_far(
+        plot.roc_for_far(
             x, y,
-            far_values=log_values(self._min_dig or -4),
+            far_values=plot.log_values(self._min_dig or -4),
             CAR=self._semilogx,
             color=kwargs.get('color'), linestyle=kwargs.get('linestyle'),
             label=kwargs.get('label')
@@ -694,8 +659,8 @@ class FmrIapmr(PadPlot):
         title = self._title if self._title is not None else "FMR vs IAPMR"
         if title.replace(' ', ''):
             mpl.title(title)
-        mpl.xlabel(self._x_label or "FMR (%)")
-        mpl.ylabel(self._y_label or "IAPMR (%)")
+        mpl.xlabel(self._x_label or "FMR")
+        mpl.ylabel(self._y_label or "IAPMR")
         mpl.grid(True, color=self._grid_color)
         if self._disp_legend:
             mpl.legend(loc=self._legend_loc)
diff --git a/bob/pad/base/test/test_commands.py b/bob/pad/base/test/test_commands.py
index 20000ac228219cb9fb64551ca79e0c9a6ed55400..b980fcad4eac09e3a60d035d3d6cc7213472e09b 100644
--- a/bob/pad/base/test/test_commands.py
+++ b/bob/pad/base/test/test_commands.py
@@ -1,11 +1,8 @@
-import sys
-import click
 from click.testing import CliRunner
 import pkg_resources
 from ..script import (pad_commands, vuln_commands)
 
 
-
 def test_det_pad():
     licit_dev = pkg_resources.resource_filename('bob.pad.base.test',
                                                 'data/licit/scores-dev')
@@ -19,7 +16,6 @@ def test_det_pad():
         assert result.exit_code == 0, (result.exit_code, result.output)
 
 
-
 def test_det_vuln():
     licit_dev = pkg_resources.resource_filename('bob.pad.base.test',
                                                 'data/licit/scores-dev')
@@ -31,7 +27,7 @@ def test_det_vuln():
                                                  'data/spoof/scores-eval')
     runner = CliRunner()
     with runner.isolated_filesystem():
-        result = runner.invoke(vuln_commands.det, ['-hla', '0.2',
+        result = runner.invoke(vuln_commands.det, ['-fnmr', '0.2',
                                                    '-o',
                                                    'DET.pdf',
                                                    licit_dev, licit_test,
@@ -39,7 +35,6 @@ def test_det_vuln():
         assert result.exit_code == 0, (result.exit_code, result.output)
 
 
-
 def test_fmr_iapmr_vuln():
     licit_dev = pkg_resources.resource_filename('bob.pad.base.test',
                                                 'data/licit/scores-dev')
@@ -64,7 +59,6 @@ def test_fmr_iapmr_vuln():
         assert result.exit_code == 0, (result.exit_code, result.output)
 
 
-
 def test_hist_pad():
     licit_dev = pkg_resources.resource_filename('bob.pad.base.test',
                                                 'data/licit/scores-dev')
@@ -96,7 +90,6 @@ def test_hist_pad():
         assert result.exit_code == 0, (result.exit_code, result.output)
 
 
-
 def test_hist_vuln():
     licit_dev = pkg_resources.resource_filename('bob.pad.base.test',
                                                 'data/licit/scores-dev')
@@ -123,28 +116,6 @@ def test_hist_vuln():
         assert result.exit_code == 0, (result.exit_code, result.output)
 
 
-
-
-def test_metrics_vuln():
-    licit_dev = pkg_resources.resource_filename('bob.pad.base.test',
-                                                'data/licit/scores-dev')
-    licit_test = pkg_resources.resource_filename('bob.pad.base.test',
-                                                 'data/licit/scores-eval')
-    spoof_dev = pkg_resources.resource_filename('bob.pad.base.test',
-                                                'data/spoof/scores-dev')
-    spoof_test = pkg_resources.resource_filename('bob.pad.base.test',
-                                                 'data/spoof/scores-eval')
-    runner = CliRunner()
-    with runner.isolated_filesystem():
-        result = runner.invoke(
-            vuln_commands.metrics,
-            ['--criterion', 'eer', licit_dev, licit_test,
-             spoof_dev, spoof_test]
-        )
-        assert result.exit_code == 0, (result.exit_code, result.output)
-
-
-
 def test_metrics_pad():
     licit_dev = pkg_resources.resource_filename('bob.pad.base.test',
                                                 'data/licit/scores-dev')
@@ -159,7 +130,6 @@ def test_metrics_pad():
         assert result.exit_code == 0, (result.exit_code, result.output)
 
 
-
 def test_epc_vuln():
     licit_dev = pkg_resources.resource_filename('bob.pad.base.test',
                                                 'data/licit/scores-dev')
@@ -199,7 +169,7 @@ def test_epsc_vuln():
                                ['--output', 'epsc.pdf',
                                 licit_dev, licit_test,
                                 spoof_dev, spoof_test])
-        assert result.exit_code == 0, (result.exit_code, result.output)
+        assert result.exit_code == 0, (result.exit_code, result.output, result.exception)
 
         result = runner.invoke(vuln_commands.epsc,
                                ['--output', 'epsc.pdf', '-I',
@@ -221,8 +191,7 @@ def test_epsc_vuln():
         assert result.exit_code == 0, (result.exit_code, result.output)
 
 
-
-def test_evaluate_vuln():
+def test_evaluate_pad():
     licit_dev = pkg_resources.resource_filename('bob.pad.base.test',
                                                 'data/licit/scores-dev')
     licit_test = pkg_resources.resource_filename('bob.pad.base.test',
@@ -236,20 +205,3 @@ def test_evaluate_vuln():
         result = runner.invoke(pad_commands.evaluate,
                                [licit_dev, licit_test, spoof_dev, spoof_test])
         assert result.exit_code == 0, (result.exit_code, result.output)
-
-
-
-def test_evaluate_vuln():
-    licit_dev = pkg_resources.resource_filename('bob.pad.base.test',
-                                                'data/licit/scores-dev')
-    licit_test = pkg_resources.resource_filename('bob.pad.base.test',
-                                                 'data/licit/scores-eval')
-    spoof_dev = pkg_resources.resource_filename('bob.pad.base.test',
-                                                'data/spoof/scores-dev')
-    spoof_test = pkg_resources.resource_filename('bob.pad.base.test',
-                                                 'data/spoof/scores-eval')
-    runner = CliRunner()
-    with runner.isolated_filesystem():
-        result = runner.invoke(vuln_commands.evaluate,
-                               [licit_dev, licit_test, spoof_dev, spoof_test])
-        assert result.exit_code == 0, (result.exit_code, result.output)
diff --git a/setup.py b/setup.py
index 4a3ef7ae4f6c1576dd7f79461ff055532d40c22a..b40f1057bac8c2d953c2ffcf0ccec7b240a610d6 100644
--- a/setup.py
+++ b/setup.py
@@ -151,7 +151,6 @@ setup(
 
         # bob vuln scripts
         'bob.vuln.cli': [
-            'metrics          = bob.pad.base.script.vuln_commands:metrics',
             'hist             = bob.pad.base.script.vuln_commands:hist',
             'det              = bob.pad.base.script.vuln_commands:det',
             'roc              = bob.pad.base.script.vuln_commands:roc',
@@ -159,7 +158,6 @@ setup(
             'epsc             = bob.pad.base.script.vuln_commands:epsc',
             'gen              = bob.pad.base.script.vuln_commands:gen',
             'fmr_iapmr        = bob.pad.base.script.vuln_commands:fmr_iapmr',
-            'evaluate         = bob.pad.base.script.vuln_commands:evaluate',
         ],
 
     },