Commit c83442c1 authored by Theophile GENTILHOMME's avatar Theophile GENTILHOMME

Change test -> eval, set different default tablefmt option, add and remove...

Change test -> eval, set different default tablefmt option, add and remove options, modify tests accordingly, and modify base MeasureBase class so that it can handle several dev/eval scores for each system (e.g. licit/spoofing for PAD)
parent 3a560776
This diff is collapsed.
......@@ -11,34 +11,60 @@ from bob.extension.scripts.click_helper import verbosity_option
logger = logging.getLogger(__name__)
def scores_argument(test_mandatory=False, **kwargs):
'''Get the argument for scores, and add `dev-scores` and `test-scores` in
the context if `--test` flag is on (default `--no-test`).'''
def scores_argument(eval_mandatory=False, min_len=1, **kwargs):
"""Get the argument for scores, and add `dev-scores` and `eval-scores` in
the context when `--evaluation` flag is on (default)
Parameters
----------
eval_mandatory :
If evaluation files are mandatory
min_len :
The min lenght of inputs files that are needed. If eval_mandatory is
True, this quantity is multiplied by 2.
Returns
-------
Click option
"""
def custom_scores_argument(func):
def callback(ctx, param, value):
length = len(value)
if length < 1:
raise click.BadParameter('No scores provided', ctx=ctx)
min_arg = min_len or 1
ctx.meta['min_arg'] = min_arg
if length < min_arg:
raise click.BadParameter(
'You must provide at least %d score files' % min_arg,
ctx=ctx
)
else:
div = 1
ctx.meta['scores'] = value
if test_mandatory or ctx.meta['test']:
div = 2
if (length % 2) != 0:
pref = 'T' if test_mandatory else ('When `--test` flag'
' is on t')
step = 1
if eval_mandatory or ctx.meta['evaluation']:
step = 2
if (length % (min_arg * 2)) != 0:
pref = 'T' if eval_mandatory else \
('When `--evaluation` flag is on t')
raise click.BadParameter(
'%sest-score(s) must '
'be provided along with dev-score(s)' % pref, ctx=ctx
'be provided along with dev-score(s). '
'You must provide at least %d score files.' \
% (pref, min_arg * 2), ctx=ctx
)
else:
ctx.meta['dev-scores'] = [value[i] for i in
range(length) if not i % 2]
ctx.meta['test-scores'] = [value[i] for i in
range(length) if i % 2]
ctx.meta['n_sys'] = len(ctx.meta['test-scores'])
for arg in range(min_arg):
ctx.meta['dev_scores_%d' % arg] = [
value[i] for i in range(arg * step, length,
min_arg * step)
]
if step > 1:
ctx.meta['eval_scores_%d' % arg] = [
value[i] for i in range((arg * step + 1),
length, min_arg * step)
]
ctx.meta['n_sys'] = len(ctx.meta['dev_scores_0'])
if 'titles' in ctx.meta and \
len(ctx.meta['titles']) != len(value) / div:
len(ctx.meta['titles']) != ctx.meta['n_sys']:
raise click.BadParameter(
'#titles not equal to #sytems', ctx=ctx
)
......@@ -73,25 +99,41 @@ def bool_option(name, short_name, desc, dflt=False, **kwargs):
show_default=True, callback=callback, is_eager=True, **kwargs)(func)
return custom_bool_option
def test_option(**kwargs):
'''Get option flag to say if test-scores are provided'''
return bool_option('test', 't', 'If set, test scores must be provided')
def eval_option(**kwargs):
'''Get option flag to say if eval-scores are provided'''
return bool_option(
'evaluation', 'e', 'If set, evaluation scores must be provided',
dflt=True
)
def sep_dev_test_option(dflt=True, **kwargs):
'''Get option flag to say if dev and test plots should be in different
def sep_dev_eval_option(dflt=True, **kwargs):
'''Get option flag to say if dev and eval plots should be in different
plots'''
return bool_option(
'split', 's','If set, test and dev curve in different plots', dflt
'split', 's','If set, evaluation and dev curve in different plots',
dflt
)
def cmc_option(**kwargs):
'''Get option flag to say if cmc scores'''
return bool_option('cmc', 'C', 'If set, CMC score files are provided')
def semilogx_option(dflt= False, **kwargs):
def semilogx_option(dflt=False, **kwargs):
'''Option to use semilog X-axis'''
return bool_option('semilogx', 'G', 'If set, use semilog on X axis', dflt)
def show_dev_option(dflt=False, **kwargs):
'''Option to tell if should show dev histo'''
return bool_option('show-dev', 'D', 'If set, show dev histograms', dflt)
def print_filenames_option(dflt=False, **kwargs):
'''Option to tell if filenames should be in the title'''
return bool_option('show-fn', 'P', 'If set, show filenames in title', dflt)
def const_layout_option(dflt=True, **kwargs):
'''Option to set matplotlib constrained_layout'''
return bool_option('clayout', 'Y', '(De)Activate constrained layout', dflt)
def list_float_option(name, short_name, desc, nitems=None, dflt=None, **kwargs):
'''Get option to get a list of float f
Parameters
......@@ -255,20 +297,14 @@ def table_option(**kwargs):
'''
def custom_table_option(func):
def callback(ctx, param, value):
if value is not None:
ctx.meta['tablefmt'] = value
elif 'log' in ctx.meta and ctx.meta['log'] is not None:
value = 'latex'
else:
value = 'rst'
ctx.meta['tablefmt'] = value
return value
return click.option(
'--tablefmt', type=click.STRING, default=None,
'--tablefmt', type=click.STRING, default='rst',
show_default=True, help='Format for table display: `plain`, '
'`simple`, `grid`, `fancy_grid`, `pipe`, `orgtbl`, '
'`jira`, `presto`, `psql`, (default) `rst`, `mediawiki`, `moinmoin`, '
'`youtrack`, `html`, (default with `--log`)`latex`, '
'`jira`, `presto`, `psql`, `rst`, `mediawiki`, `moinmoin`, '
'`youtrack`, `html`, `latex`, '
'`latex_raw`, `latex_booktabs`, `textile`',
callback=callback,**kwargs)(func)
return custom_table_option
......@@ -308,7 +344,7 @@ def output_plot_metric_option(**kwargs):
return custom_output_plot_file_option
def open_file_mode_option(**kwargs):
'''Get the top option for matplotlib'''
'''Get open mode file option'''
def custom_open_file_mode_option(func):
def callback(ctx, param, value):
if value not in ['w', 'a', 'w+', 'a+']:
......@@ -385,7 +421,7 @@ def label_option(name_option='x_label', **kwargs):
def custom_label_option(func):
def callback(ctx, param, value):
''' Get and save labels list in the context list '''
ctx.meta[name_option] = value if value is None else \
ctx.meta[name_option.replace('-', '_')] = value if value is None else \
[int(i) for i in value.split(',')]
return value
return click.option(
......@@ -457,20 +493,6 @@ def marker_style_option(**kwargs):
callback=callback, **kwargs)(func)
return custom_marker_style_option
def top_option(**kwargs):
'''Get the top option for matplotlib'''
def custom_top_option(func):
def callback(ctx, param, value):
ctx.meta['top'] = value
return value
return click.option(
'--top', type=FLOAT,
help='To give to ``plt.subplots_adjust(top=top)``. If given, first'
' plt.tight_layout is called. If you want to tight_layout to be '
'called, then you need to provide this option.',
callback=callback, **kwargs)(func)
return custom_top_option
def titles_option(**kwargs):
'''Get the titles otpion for the different systems'''
def custom_titles_option(func):
......@@ -480,7 +502,7 @@ def titles_option(**kwargs):
ctx.meta['titles'] = value
return value
return click.option(
'--titles', type=click.STRING, default=None,
'-t', '--titles', type=click.STRING, default=None,
help='The title for each system comma separated. '
'Example: --titles ISV,CNN',
callback=callback, **kwargs)(func)
......
This diff is collapsed.
......@@ -10,7 +10,7 @@ from .script import commands
def test_metrics():
dev1 = bob.io.base.test_utils.datafile('dev-1.txt', 'bob.measure')
runner = CliRunner()
result = runner.invoke(commands.metrics, [dev1])
result = runner.invoke(commands.metrics, ['--no-evaluation', dev1])
with runner.isolated_filesystem():
with open('tmp', 'w') as f:
f.write(result.output)
......@@ -22,7 +22,7 @@ def test_metrics():
test2 = bob.io.base.test_utils.datafile('test-2.txt', 'bob.measure')
with runner.isolated_filesystem():
result = runner.invoke(
commands.metrics, ['--test', dev1, test1, dev2, test2]
commands.metrics, [dev1, test1, dev2, test2]
)
with open('tmp', 'w') as f:
f.write(result.output)
......@@ -30,12 +30,13 @@ def test_metrics():
assert result.exit_code == 0
with runner.isolated_filesystem():
result = runner.invoke(
commands.metrics, ['-l', 'tmp', '--test', dev1, test1, dev2, test2]
commands.metrics, ['-l', 'tmp', dev1, test1, dev2, test2, '-t',
'A,B']
)
assert result.exit_code == 0
with runner.isolated_filesystem():
result = runner.invoke(
commands.metrics, ['-l', 'tmp', '--test', dev1, dev2]
commands.metrics, ['-l', 'tmp', '--no-evaluation', dev1, dev2]
)
assert result.exit_code == 0
......@@ -43,7 +44,8 @@ def test_roc():
dev1 = bob.io.base.test_utils.datafile('dev-1.txt', 'bob.measure')
runner = CliRunner()
with runner.isolated_filesystem():
result = runner.invoke(commands.roc, ['--output','test.pdf',dev1])
result = runner.invoke(commands.roc, ['--no-evaluation', '--output',
'test.pdf',dev1])
if result.output:
click.echo(result.output)
assert result.exit_code == 0
......@@ -51,7 +53,7 @@ def test_roc():
test1 = bob.io.base.test_utils.datafile('test-1.txt', 'bob.measure')
test2 = bob.io.base.test_utils.datafile('test-2.txt', 'bob.measure')
with runner.isolated_filesystem():
result = runner.invoke(commands.roc, ['--test', '--split', '--output',
result = runner.invoke(commands.roc, ['--split', '--output',
'test.pdf',
dev1, test1, dev2, test2])
if result.output:
......@@ -59,7 +61,7 @@ def test_roc():
assert result.exit_code == 0
with runner.isolated_filesystem():
result = runner.invoke(commands.roc, ['--test', '--output',
result = runner.invoke(commands.roc, ['--output',
'test.pdf', '--titles', 'A,B',
dev1, test1, dev2, test2])
if result.output:
......@@ -71,7 +73,7 @@ def test_det():
dev1 = bob.io.base.test_utils.datafile('dev-1.txt', 'bob.measure')
runner = CliRunner()
with runner.isolated_filesystem():
result = runner.invoke(commands.det, [dev1])
result = runner.invoke(commands.det, ['--no-evaluation', dev1])
if result.output:
click.echo(result.output)
assert result.exit_code == 0
......@@ -79,14 +81,14 @@ def test_det():
test1 = bob.io.base.test_utils.datafile('test-1.txt', 'bob.measure')
test2 = bob.io.base.test_utils.datafile('test-2.txt', 'bob.measure')
with runner.isolated_filesystem():
result = runner.invoke(commands.det, ['--test', '--split', '--output',
result = runner.invoke(commands.det, ['--split', '--output',
'test.pdf', '--titles', 'A,B',
dev1, test1, dev2, test2])
if result.output:
click.echo(result.output)
assert result.exit_code == 0
with runner.isolated_filesystem():
result = runner.invoke(commands.det, ['--test', '--output',
result = runner.invoke(commands.det, ['--output',
'test.pdf',
dev1, test1, dev2, test2])
if result.output:
......@@ -119,15 +121,15 @@ def test_hist():
test2 = bob.io.base.test_utils.datafile('test-2.txt', 'bob.measure')
runner = CliRunner()
with runner.isolated_filesystem():
result = runner.invoke(commands.hist, [dev1])
result = runner.invoke(commands.hist, ['--no-evaluation', dev1])
if result.output:
click.echo(result.output)
assert result.exit_code == 0
with runner.isolated_filesystem():
result = runner.invoke(commands.hist, ['--criter', 'hter','--output',
'HISTO.pdf', '-b', 30,
dev1, dev2])
result = runner.invoke(commands.hist, ['--no-evaluation', '--criter', 'hter',
'--output', 'HISTO.pdf', '-b',
30, dev1, dev2])
if result.output:
click.echo(result.output)
assert result.exit_code == 0
......@@ -148,19 +150,16 @@ def test_evaluate():
test2 = bob.io.base.test_utils.datafile('test-2.txt', 'bob.measure')
runner = CliRunner()
with runner.isolated_filesystem():
result = runner.invoke(commands.evaluate, [dev1])
result = runner.invoke(commands.evaluate, ['--no-evaluation', dev1])
assert result.exit_code == 0
with runner.isolated_filesystem():
result = runner.invoke(
commands.evaluate, ['--output', 'my_plots.pdf', '-b', 30,
'-n', 300, dev1, dev2])
commands.evaluate, ['--no-evaluation', '--output', 'my_plots.pdf', '-b',
30, '-n', 300, dev1, dev2])
assert result.exit_code == 0
with runner.isolated_filesystem():
result = runner.invoke(
commands.evaluate, ['-t', dev1, test1, dev2, test2])
commands.evaluate, [dev1, test1, dev2, test2])
assert result.exit_code == 0
......@@ -75,7 +75,8 @@ The optimal threshold :math:`\tau^*` is then computed using different values of
where :math:`\mathcal{D}_{d}` denotes the development set and should be
completely separate to the evaluation set :math:`\mathcal{D}`.
Performance for different values of :math:`\beta` is then computed on the test
Performance for different values of :math:`\beta` is then computed on the
evaluation
set :math:`\mathcal{D}_{t}` using the previously derived threshold. Note that
setting :math:`\beta` to 0.5 yields to the Half Total Error Rate (HTER) as
defined in the first equation.
......@@ -134,7 +135,7 @@ We do provide a method to calculate the FAR and FRR in a single shot:
The threshold ``T`` is normally calculated by looking at the distribution of
negatives and positives in a development (or validation) set, selecting a
threshold that matches a certain criterion and applying this derived threshold
to the test (or evaluation) set. This technique gives a better overview of the
to the evaluation set. This technique gives a better overview of the
generalization of a method. We implement different techniques for the
calculation of the threshold:
......@@ -363,7 +364,7 @@ EPC
===
Drawing an EPC requires that both the development set negatives and positives are provided alongside
the test (or evaluation) set ones. Because of this the API is slightly modified:
the evaluation set ones. Because of this the API is slightly modified:
.. doctest::
......@@ -503,9 +504,9 @@ and FRs are also displayed between parenthesis.
.. note::
Several scores files can be given at once and the metrics will be computed
for each of them separatly. Development and test files must be given by
pairs and the ``--test`` (or ``-t``) flag must be given (otherwise test
scores are treated as development scores)
for each of them separatly. Development and evaluation files must be given by
pairs. When only Development file are provided, ``--no-evaluation`` flag
must be given.
To evaluate the performance of a new score file with a given threshold, use
......@@ -513,10 +514,10 @@ To evaluate the performance of a new score file with a given threshold, use
.. code-block:: sh
$ bob measure metrics --thres 0.006 test-1.txt
[Min. criterion: user provider] Threshold on Development set `test-1`: 6.000000e-03
$ bob measure metrics --thres 0.006 eval-1.txt
[Min. criterion: user provider] Threshold on Development set `eval-1`: 6.000000e-03
==== ====================
.. Development test-1
.. Development eval-1
==== ====================
FMR 5.010% (24/479)
FNMR 6.977% (33/473)
......@@ -526,14 +527,14 @@ To evaluate the performance of a new score file with a given threshold, use
==== ====================
You can simultaneously conduct the threshold computation and its performance
on a test set:
on an evaluation set:
.. code-block:: sh
$ bob measure metrics --test dev-1.txt test-1.txt
$ bob measure metrics dev-1.txt eval-1.txt
[Min. criterion: EER] Threshold on Development set `dev-1`: -8.025286e-03
==== =================== ===============
.. Development dev-1 Test test-1
.. Development dev-1 Eval. eval-1
==== =================== ===============
FMR 6.263% (31/495) 5.637% (27/479)
FNMR 6.208% (28/451) 6.131% (29/473)
......@@ -554,7 +555,7 @@ Plots
=====
Customizable plotting commands are available in the :py:mod:`bob.measure` module.
They take a list of development and/or test files and generate a single PDF
They take a list of development and/or evaluation files and generate a single PDF
file containing the plots. Available plots are:
* ``roc`` (receiver operating characteristic)
......@@ -568,12 +569,12 @@ file containing the plots. Available plots are:
Use the ``--help`` option on the above-cited commands to find-out about more
options.
For example, to generate a DET curve from development and test datasets:
For example, to generate a DET curve from development and evaluation datasets:
.. code-block:: sh
$bob measure det --test --split --output 'my_det.pdf' dev-1.txt test-1.txt
dev-2.txt test-2.txt
$bob measure det --split --output 'my_det.pdf' dev-1.txt eval-1.txt
dev-2.txt eval-2.txt
where `my_det.pdf` will contain DET plots for the two experiments.
......@@ -593,9 +594,9 @@ experiment. For example:
.. code-block:: sh
$bob measure evaluate -t -l 'my_metrics.txt' -o 'my_plots.pdf' {sys1, sys2}/
{test,dev}
{eval,dev}
will output metrics and plots for the two experiments (dev and test pairs) in
will output metrics and plots for the two experiments (dev and eval pairs) in
`my_metrics.txt` and `my_plots.pdf`, respectively.
.. include:: links.rst
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment