Commit c83442c1 authored by Theophile GENTILHOMME's avatar Theophile GENTILHOMME

Change test -> eval, set different default tablefmt option, add and remove...

Change test -> eval, set different default tablefmt option, add and remove options, modify tests accordingly, and modify base MeasureBase class so that it can handle several dev/eval scores for each system (e.g. licit/spoofing for PAD)
parent 3a560776
......@@ -10,22 +10,23 @@ from bob.extension.scripts.click_helper import verbosity_option
@click.command()
@common_options.scores_argument(nargs=-1)
@common_options.test_option()
@common_options.eval_option()
@common_options.table_option()
@common_options.open_file_mode_option()
@common_options.output_plot_metric_option()
@common_options.criterion_option()
@common_options.thresholds_option()
@common_options.far_option()
@common_options.titles_option()
@verbosity_option()
@click.pass_context
def metrics(ctx, scores, test, **kwargs):
def metrics(ctx, scores, evaluation, **kwargs):
"""Prints a single output line that contains all info for a given
criterion (eer or hter).
You need provide one or more development score file(s) for each experiment.
You can also provide test files along with dev files but the flag `--test`
is required in that case.
You can also provide evaluation files along with dev files. If only dev scores
are provided, you must use flag `--no-evaluation`.
Resulting table format can be changer using the `--tablefmt`. Default
formats are `fancy_grid` when output in the terminal and `latex` when
......@@ -34,47 +35,48 @@ def metrics(ctx, scores, test, **kwargs):
Examples:
$ bob measure metrics dev-scores
$ bob measure metrics --test -l results.txt dev-scores1 test-scores1
$ bob measure metrics -l results.txt dev-scores1 eval-scores1
$ bob measure metrics --test {dev,test}-scores1 {dev,test}-scores2
$ bob measure metrics {dev,eval}-scores1 {dev,eval}-scores2
"""
process = figure.Metrics(ctx, scores, test, load.split_files)
process = figure.Metrics(ctx, scores, evaluation, load.split_files)
process.run()
@click.command()
@common_options.scores_argument(nargs=-1)
@common_options.titles_option()
@common_options.sep_dev_test_option()
@common_options.sep_dev_eval_option()
@common_options.output_plot_file_option(default_out='roc.pdf')
@common_options.test_option()
@common_options.eval_option()
@common_options.points_curve_option()
@common_options.semilogx_option(True)
@common_options.axes_val_option(dflt=[1e-4, 1, 1e-4, 1])
@common_options.axis_fontsize_option()
@common_options.x_rotation_option()
@common_options.fmr_line_at_option()
@common_options.const_layout_option()
@verbosity_option()
@click.pass_context
def roc(ctx, scores, test, **kwargs):
def roc(ctx, scores, evaluation, **kwargs):
"""Plot ROC (receiver operating characteristic) curve:
The plot will represent the false match rate on the horizontal axis and the
false non match rate on the vertical axis. The values for the axis will be
computed using :py:func:`bob.measure.roc`.
You need provide one or more development score file(s) for each experiment.
You can also provide test files along with dev files but the flag `--test`
is required in that case.
You can also provide evaluation files along with dev files. If only dev scores
are provided, you must use flag `--no-evaluation`.
Examples:
$ bob measure roc dev-scores
$ bob measure roc --test dev-scores1 test-scores1 dev-scores2
test-scores2
$ bob measure roc dev-scores1 eval-scores1 dev-scores2
eval-scores2
$ bob measure roc --test -o my_roc.pdf dev-scores1 test-scores1
$ bob measure roc -o my_roc.pdf dev-scores1 eval-scores1
"""
process = figure.Roc(ctx, scores, test, load.split_files)
process = figure.Roc(ctx, scores, evaluation, load.split_files)
process.run()
......@@ -82,56 +84,58 @@ def roc(ctx, scores, test, **kwargs):
@common_options.scores_argument(nargs=-1)
@common_options.output_plot_file_option(default_out='det.pdf')
@common_options.titles_option()
@common_options.sep_dev_test_option()
@common_options.test_option()
@common_options.sep_dev_eval_option()
@common_options.eval_option()
@common_options.axes_val_option(dflt=[0.01, 95, 0.01, 95])
@common_options.axis_fontsize_option(dflt=6)
@common_options.x_rotation_option(dflt=45)
@common_options.points_curve_option()
@common_options.const_layout_option()
@verbosity_option()
@click.pass_context
def det(ctx, scores, test, **kwargs):
def det(ctx, scores, evaluation, **kwargs):
"""Plot DET (detection error trade-off) curve:
modified ROC curve which plots error rates on both axes
(false positives on the x-axis and false negatives on the y-axis)
You need provide one or more development score file(s) for each experiment.
You can also provide test files along with dev files but the flag `--test`
is required in that case.
You can also provide evaluation files along with dev files. If only dev scores
are provided, you must use flag `--no-evaluation`.
Examples:
$ bob measure det dev-scores
$ bob measure det --test dev-scores1 test-scores1 dev-scores2
test-scores2
$ bob measure det dev-scores1 eval-scores1 dev-scores2
eval-scores2
$ bob measure det --test -o my_det.pdf dev-scores1 test-scores1
$ bob measure det -o my_det.pdf dev-scores1 eval-scores1
"""
process = figure.Det(ctx, scores, test, load.split_files)
process = figure.Det(ctx, scores, evaluation, load.split_files)
process.run()
@click.command()
@common_options.scores_argument(test_mandatory=True, nargs=-1)
@common_options.scores_argument(eval_mandatory=True, nargs=-1)
@common_options.output_plot_file_option(default_out='epc.pdf')
@common_options.titles_option()
@common_options.points_curve_option()
@common_options.axis_fontsize_option()
@common_options.const_layout_option()
@verbosity_option()
@click.pass_context
def epc(ctx, scores, **kwargs):
"""Plot EPC (expected performance curve):
plots the error rate on the test set depending on a threshold selected
plots the error rate on the eval set depending on a threshold selected
a-priori on the development set and accounts for varying relative cost
in [0; 1] of FPR and FNR when calculating the threshold.
You need provide one or more development score and test file(s)
You need provide one or more development score and eval file(s)
for each experiment.
Examples:
$ bob measure epc dev-scores test-scores
$ bob measure epc dev-scores eval-scores
$ bob measure epc -o my_epc.pdf dev-scores1 test-scores1
$ bob measure epc -o my_epc.pdf dev-scores1 eval-scores1
"""
process = figure.Epc(ctx, scores, True, load.split_files)
process.run()
......@@ -140,55 +144,64 @@ def epc(ctx, scores, **kwargs):
@click.command()
@common_options.scores_argument(nargs=-1)
@common_options.output_plot_file_option(default_out='hist.pdf')
@common_options.test_option()
@common_options.eval_option()
@common_options.n_bins_option()
@common_options.criterion_option()
@common_options.axis_fontsize_option()
@common_options.thresholds_option()
@common_options.const_layout_option()
@common_options.show_dev_option()
@common_options.print_filenames_option()
@verbosity_option()
@click.pass_context
def hist(ctx, scores, test, **kwargs):
def hist(ctx, scores, evaluation, **kwargs):
""" Plots histograms of positive and negatives along with threshold
criterion.
You need provide one or more development score file(s) for each experiment.
You can also provide test files along with dev files but the flag `--test`
is required in that case.
You can also provide evaluation files along with dev files. If only dev scores
are provided, you must use flag `--no-evaluation`.
By default, when eval-scores are given, only eval-scores histograms are
displayed with threshold line
computed from dev-scores. If you want to display dev-scores distributions
as well, use ``--show-dev`` option.
Examples:
$ bob measure hist dev-scores
$ bob measure hist --test dev-scores1 test-scores1 dev-scores2
test-scores2
$ bob measure hist dev-scores1 eval-scores1 dev-scores2
eval-scores2
$ bob measure hist --test --criter hter dev-scores1 test-scores1
$ bob measure hist --criter hter --show-dev dev-scores1 eval-scores1
"""
process = figure.Hist(ctx, scores, test, load.split_files)
process = figure.Hist(ctx, scores, evaluation, load.split_files)
process.run()
@click.command()
@common_options.scores_argument(nargs=-1)
@common_options.titles_option()
@common_options.sep_dev_test_option()
@common_options.sep_dev_eval_option()
@common_options.table_option()
@common_options.test_option()
@common_options.eval_option()
@common_options.output_plot_metric_option()
@common_options.output_plot_file_option(default_out='eval_plots.pdf')
@common_options.points_curve_option()
@common_options.semilogx_option(dflt=True)
@common_options.n_bins_option()
@common_options.fmr_line_at_option()
@common_options.const_layout_option()
@verbosity_option()
@click.pass_context
def evaluate(ctx, scores, test, **kwargs):
def evaluate(ctx, scores, evaluation, **kwargs):
'''Runs error analysis on score sets
\b
1. Computes the threshold using either EER or min. HTER criteria on
development set scores
2. Applies the above threshold on test set scores to compute the HTER, if a
test-score set is provided
2. Applies the above threshold on evaluation set scores to compute the HTER, if a
eval-score set is provided
3. Reports error rates on the console
4. Plots ROC, EPC, DET curves and score distributions to a multi-page PDF
file
......@@ -203,25 +216,25 @@ def evaluate(ctx, scores, test, **kwargs):
Examples:
$ bob measure evaluate dev-scores
$ bob measure evaluate -t -l metrics.txt -o my_plots.pdf dev-scores test-scores
$ bob measure evaluate -l metrics.txt -o my_plots.pdf dev-scores eval-scores
'''
# first time erase if existing file
ctx.meta['open_mode'] = 'w'
click.echo("Computing metrics with EER...")
ctx.meta['criter'] = 'eer' # no criterion passed to evaluate
ctx.invoke(metrics, scores=scores, test=test)
ctx.invoke(metrics, scores=scores, evaluation=evaluation)
# second time, appends the content
ctx.meta['open_mode'] = 'a'
click.echo("Computing metrics with HTER...")
ctx.meta['criter'] = 'hter' # no criterion passed in evaluate
ctx.invoke(metrics, scores=scores, test=test)
ctx.invoke(metrics, scores=scores, evaluation=evaluation)
if 'log' in ctx.meta:
click.echo("[metrics] => %s" % ctx.meta['log'])
# avoid closing pdf file before all figures are plotted
ctx.meta['closef'] = False
if test:
click.echo("Starting evaluate with dev and test scores...")
if evaluation:
click.echo("Starting evaluate with dev and eval scores...")
else:
click.echo("Starting evaluate with dev scores only...")
click.echo("Computing ROC...")
......@@ -229,7 +242,7 @@ def evaluate(ctx, scores, test, **kwargs):
ctx.forward(roc) # use class defaults plot settings
click.echo("Computing DET...")
ctx.forward(det) # use class defaults plot settings
if test:
if evaluation:
click.echo("Computing EPC...")
ctx.forward(epc) # use class defaults plot settings
ctx.forward(epc)
......
......@@ -11,34 +11,60 @@ from bob.extension.scripts.click_helper import verbosity_option
logger = logging.getLogger(__name__)
def scores_argument(test_mandatory=False, **kwargs):
'''Get the argument for scores, and add `dev-scores` and `test-scores` in
the context if `--test` flag is on (default `--no-test`).'''
def scores_argument(eval_mandatory=False, min_len=1, **kwargs):
"""Get the argument for scores, and add `dev-scores` and `eval-scores` in
the context when `--evaluation` flag is on (default)
Parameters
----------
eval_mandatory :
If evaluation files are mandatory
min_len :
The min lenght of inputs files that are needed. If eval_mandatory is
True, this quantity is multiplied by 2.
Returns
-------
Click option
"""
def custom_scores_argument(func):
def callback(ctx, param, value):
length = len(value)
if length < 1:
raise click.BadParameter('No scores provided', ctx=ctx)
min_arg = min_len or 1
ctx.meta['min_arg'] = min_arg
if length < min_arg:
raise click.BadParameter(
'You must provide at least %d score files' % min_arg,
ctx=ctx
)
else:
div = 1
ctx.meta['scores'] = value
if test_mandatory or ctx.meta['test']:
div = 2
if (length % 2) != 0:
pref = 'T' if test_mandatory else ('When `--test` flag'
' is on t')
step = 1
if eval_mandatory or ctx.meta['evaluation']:
step = 2
if (length % (min_arg * 2)) != 0:
pref = 'T' if eval_mandatory else \
('When `--evaluation` flag is on t')
raise click.BadParameter(
'%sest-score(s) must '
'be provided along with dev-score(s)' % pref, ctx=ctx
'be provided along with dev-score(s). '
'You must provide at least %d score files.' \
% (pref, min_arg * 2), ctx=ctx
)
else:
ctx.meta['dev-scores'] = [value[i] for i in
range(length) if not i % 2]
ctx.meta['test-scores'] = [value[i] for i in
range(length) if i % 2]
ctx.meta['n_sys'] = len(ctx.meta['test-scores'])
for arg in range(min_arg):
ctx.meta['dev_scores_%d' % arg] = [
value[i] for i in range(arg * step, length,
min_arg * step)
]
if step > 1:
ctx.meta['eval_scores_%d' % arg] = [
value[i] for i in range((arg * step + 1),
length, min_arg * step)
]
ctx.meta['n_sys'] = len(ctx.meta['dev_scores_0'])
if 'titles' in ctx.meta and \
len(ctx.meta['titles']) != len(value) / div:
len(ctx.meta['titles']) != ctx.meta['n_sys']:
raise click.BadParameter(
'#titles not equal to #sytems', ctx=ctx
)
......@@ -73,25 +99,41 @@ def bool_option(name, short_name, desc, dflt=False, **kwargs):
show_default=True, callback=callback, is_eager=True, **kwargs)(func)
return custom_bool_option
def test_option(**kwargs):
'''Get option flag to say if test-scores are provided'''
return bool_option('test', 't', 'If set, test scores must be provided')
def eval_option(**kwargs):
'''Get option flag to say if eval-scores are provided'''
return bool_option(
'evaluation', 'e', 'If set, evaluation scores must be provided',
dflt=True
)
def sep_dev_test_option(dflt=True, **kwargs):
'''Get option flag to say if dev and test plots should be in different
def sep_dev_eval_option(dflt=True, **kwargs):
'''Get option flag to say if dev and eval plots should be in different
plots'''
return bool_option(
'split', 's','If set, test and dev curve in different plots', dflt
'split', 's','If set, evaluation and dev curve in different plots',
dflt
)
def cmc_option(**kwargs):
'''Get option flag to say if cmc scores'''
return bool_option('cmc', 'C', 'If set, CMC score files are provided')
def semilogx_option(dflt= False, **kwargs):
def semilogx_option(dflt=False, **kwargs):
'''Option to use semilog X-axis'''
return bool_option('semilogx', 'G', 'If set, use semilog on X axis', dflt)
def show_dev_option(dflt=False, **kwargs):
'''Option to tell if should show dev histo'''
return bool_option('show-dev', 'D', 'If set, show dev histograms', dflt)
def print_filenames_option(dflt=False, **kwargs):
'''Option to tell if filenames should be in the title'''
return bool_option('show-fn', 'P', 'If set, show filenames in title', dflt)
def const_layout_option(dflt=True, **kwargs):
'''Option to set matplotlib constrained_layout'''
return bool_option('clayout', 'Y', '(De)Activate constrained layout', dflt)
def list_float_option(name, short_name, desc, nitems=None, dflt=None, **kwargs):
'''Get option to get a list of float f
Parameters
......@@ -255,20 +297,14 @@ def table_option(**kwargs):
'''
def custom_table_option(func):
def callback(ctx, param, value):
if value is not None:
ctx.meta['tablefmt'] = value
elif 'log' in ctx.meta and ctx.meta['log'] is not None:
value = 'latex'
else:
value = 'rst'
ctx.meta['tablefmt'] = value
return value
return click.option(
'--tablefmt', type=click.STRING, default=None,
'--tablefmt', type=click.STRING, default='rst',
show_default=True, help='Format for table display: `plain`, '
'`simple`, `grid`, `fancy_grid`, `pipe`, `orgtbl`, '
'`jira`, `presto`, `psql`, (default) `rst`, `mediawiki`, `moinmoin`, '
'`youtrack`, `html`, (default with `--log`)`latex`, '
'`jira`, `presto`, `psql`, `rst`, `mediawiki`, `moinmoin`, '
'`youtrack`, `html`, `latex`, '
'`latex_raw`, `latex_booktabs`, `textile`',
callback=callback,**kwargs)(func)
return custom_table_option
......@@ -308,7 +344,7 @@ def output_plot_metric_option(**kwargs):
return custom_output_plot_file_option
def open_file_mode_option(**kwargs):
'''Get the top option for matplotlib'''
'''Get open mode file option'''
def custom_open_file_mode_option(func):
def callback(ctx, param, value):
if value not in ['w', 'a', 'w+', 'a+']:
......@@ -385,7 +421,7 @@ def label_option(name_option='x_label', **kwargs):
def custom_label_option(func):
def callback(ctx, param, value):
''' Get and save labels list in the context list '''
ctx.meta[name_option] = value if value is None else \
ctx.meta[name_option.replace('-', '_')] = value if value is None else \
[int(i) for i in value.split(',')]
return value
return click.option(
......@@ -457,20 +493,6 @@ def marker_style_option(**kwargs):
callback=callback, **kwargs)(func)
return custom_marker_style_option
def top_option(**kwargs):
'''Get the top option for matplotlib'''
def custom_top_option(func):
def callback(ctx, param, value):
ctx.meta['top'] = value
return value
return click.option(
'--top', type=FLOAT,
help='To give to ``plt.subplots_adjust(top=top)``. If given, first'
' plt.tight_layout is called. If you want to tight_layout to be '
'called, then you need to provide this option.',
callback=callback, **kwargs)(func)
return custom_top_option
def titles_option(**kwargs):
'''Get the titles otpion for the different systems'''
def custom_titles_option(func):
......@@ -480,7 +502,7 @@ def titles_option(**kwargs):
ctx.meta['titles'] = value
return value
return click.option(
'--titles', type=click.STRING, default=None,
'-t', '--titles', type=click.STRING, default=None,
help='The title for each system comma separated. '
'Example: --titles ISV,CNN',
callback=callback, **kwargs)(func)
......
......@@ -38,20 +38,23 @@ class MeasureBase(object):
----------
_scores: :any:`list`:
List of input files (e.g. dev-{1, 2, 3}, {dev,test}-scores1
List of input files (e.g. dev-{1, 2, 3}, {dev,eval}-scores1
_ctx : :py:class:`dict`
Click context dictionary.
_test : :py:class:`bool`
True if test data are used
_eval : :py:class:`bool`
True if eval data are used
_titles: :any:`list`
List of titles for each system (dev + (eval) scores)
func_load:
Function that is used to load the input files
"""
__metaclass__ = ABCMeta #for python 2.7 compatibility
def __init__(self, ctx, scores, test, func_load):
def __init__(self, ctx, scores, eval, func_load):
"""
Parameters
----------
......@@ -60,18 +63,24 @@ class MeasureBase(object):
Click context dictionary.
scores : :any:`list`:
List of input files (e.g. dev-{1, 2, 3}, {dev,test}-scores1
{dev,test}-scores2)
test : :py:class:`bool`
True if test data are used
List of input files (e.g. dev-{1, 2, 3}, {dev,eval}-scores1
{dev,eval}-scores2)
eval : :py:class:`bool`
True if eval data are used
func_load : Function that is used to load the input files
"""
self._scores = scores
self._min_arg = 1 if 'min_arg' not in ctx.meta else ctx.meta['min_arg']
self._ctx = ctx
self.func_load = func_load
self.dev_names, self.test_names, self.dev_scores, self.test_scores = \
self.dev_names, self.eval_names, self.dev_scores, self.eval_scores = \
self._load_files()
self._test = test
self.n_sytem = len(self.dev_names[0]) # at least one set of dev scores
self._titles = None if 'titles' not in ctx.meta else ctx.meta['titles']
if self._titles is not None and len(self._titles) != self.n_sytem:
raise click.BadParameter("Number of titles must be equal to the "
"number of systems")
self._eval = eval
def run(self):
""" Generate outputs (e.g. metrics, files, pdf plots).
......@@ -85,15 +94,29 @@ class MeasureBase(object):
#init matplotlib, log files, ...
self.init_process()
#iterates through the different systems and feed `compute`
#with the dev (and test) scores of each system
for idx, (dev_score, dev_file) in enumerate(
zip(self.dev_scores, self.dev_names)
):
test_score = self.test_scores[idx] if self.test_scores is not None\
else None
test_file = None if self.test_names is None else self.test_names[idx]
#does the main computations/plottings here
self.compute(idx, dev_score, dev_file, test_score, test_file)
#with the dev (and eval) scores of each system
# Note that more than one dev or eval scores score can be passed to
# each system
for idx in range(self.n_sytem):
dev_score = []
eval_score = []
dev_file = []
eval_file = []
for arg in range(self._min_arg):
dev_score.append(self.dev_scores[arg][idx])
dev_file.append(self.dev_names[arg][idx])
eval_score.append(self.eval_scores[arg][idx] \
if self.eval_scores[arg] is not None else None)
eval_file.append(self.eval_names[arg][idx] \
if self.eval_names[arg] is not None else None)
if self._min_arg == 1: # most of measure only take one arg
# so do not pass a list of one arg
#does the main computations/plottings here
self.compute(idx, dev_score[0], dev_file[0], eval_score[0],
eval_file[0])
else:
#does the main computations/plottings here
self.compute(idx, dev_score, dev_file, eval_score, eval_file)
#setup final configuration, plotting properties, ...
self.end_process()
......@@ -107,7 +130,7 @@ class MeasureBase(object):
#Main computations are done here in the subclasses
@abstractmethod
def compute(self, idx, dev_score, dev_file=None,
test_score=None, test_file=None):
eval_score=None, eval_file=None):
"""Compute metrics or plots from the given scores provided by
:py:func:`~bob.measure.script.figure.MeasureBase.run`.
Should reimplemented in derived classes
......@@ -125,13 +148,13 @@ class MeasureBase(object):
a :any:`list` of tuples of :py:class:`numpy.ndarray` (e.g. cmc)
dev_file : str
name of the dev file without extension
test_score:
Test scores. Can be a tuple (neg, pos) of
eval_score:
eval scores. Can be a tuple (neg, pos) of
:py:class:`numpy.ndarray` (e.g.
:py:func:`~bob.measure.script.figure.Roc.compute`) or
a :any:`list` of tuples of :py:class:`numpy.ndarray` (e.g. cmc)
test_file : str
name of the test file without extension
eval_file : str
name of the eval file without extension
"""
pass
......@@ -151,14 +174,12 @@ class MeasureBase(object):
Returns
-------
:any:`list`: A list (of list) of tuples, where each tuple contains the
``negative`` and ``positive`` scores for one probe of the database. Both
``negatives`` and ``positives`` can be either an 1D'''
dev_scores: :any:`list`: A list that contains, for each required
dev score file, the output of ``func_load``
eval_scores: :any:`list`: A list that contains, for each required
eval score file, the output of ``func_load``
'''
dev_paths = self._scores if 'dev-scores' not in self._ctx.meta else \
self._ctx.meta['dev-scores']
test_paths = None if 'test-scores' not in self._ctx.meta else \
self._ctx.meta['test-scores']
def _extract_file_names(filenames):
if filenames is None:
return None
......@@ -167,25 +188,40 @@ class MeasureBase(object):
_, name = ntpath.split(file_path)
res.append(name.split(".")[0])
return res
return (_extract_file_names(dev_paths), _extract_file_names(test_paths),
self.func_load(dev_paths), self.func_load(test_paths))
def _process_scores(self, dev_score, test_score):
'''Process score files and return neg/pos/fta for test and dev'''
dev_neg = dev_pos = dev_fta = test_neg = test_pos = test_fta = None
dev_scores = []
eval_scores = []
dev_files = []
eval_files = []
for arg in range(self._min_arg):
key = 'dev_scores_%d' % arg
dev_paths = self._scores if key not in self._ctx.meta else \
self._ctx.meta[key]
key = 'eval_scores_%d' % arg
eval_paths = None if key not in self._ctx.meta else \
self._ctx.meta[key]
dev_files.append(_extract_file_names(dev_paths))
eval_files.append(_extract_file_names(eval_paths))
dev_scores.append(self.func_load(dev_paths))
eval_scores.append(self.func_load(eval_paths))
return (dev_files, eval_files, dev_scores, eval_scores)
def _process_scores(self, dev_score, eval_score):
'''Process score files and return neg/pos/fta for eval and dev'''
dev_neg = dev_pos = dev_fta = eval_neg = eval_pos = eval_fta = None
if dev_score[0] is not None:
dev_score, dev_fta = utils.get_fta(dev_score)
dev_neg, dev_pos = dev_score
if dev_neg is None:
raise click.UsageError("While loading dev-score file")
if self._test and test_score is not None and test_score[0] is not None:
test_score, test_fta = utils.get_fta(test_score)
test_neg, test_pos = test_score
if test_neg is None:
raise click.UsageError("While loading test-score file")
if self._eval and eval_score is not None and eval_score[0] is not None:
eval_score, eval_fta = utils.get_fta(eval_score)
eval_neg, eval_pos = eval_score
if eval_neg is None:
raise click.UsageError("While loading eval-score file")
return (dev_neg, dev_pos, dev_fta, test_neg, test_pos, test_fta)
return (dev_neg, dev_pos, dev_fta, eval_neg, eval_pos, eval_fta)
class Metrics(MeasureBase):
......@@ -217,8 +253,8 @@ class Metrics(MeasureBase):
output stream
'''
def __init__(self, ctx, scores, test, func_load):
super(Metrics, self).__init__(ctx, scores, test, func_load)
def __init__(self, ctx, scores, evaluation, func_load):
super(Metrics, self).__init__(ctx, scores, evaluation, func_load)
self._tablefmt = None if 'tablefmt' not in ctx.meta else\
ctx.meta['tablefmt']
self._criter = None if 'criter' not in ctx.meta else ctx.meta['criter']
......@@ -241,24 +277,25 @@ class Metrics(MeasureBase):
self.log_file = open(self._log, self._open_mode)
def compute(self, idx, dev_score, dev_file=None,
test_score=None, test_file=None):
eval_score=None, eval_file=None):
''' Compute metrics thresholds and tables (FAR, FMR, FNMR, HTER) for
given system inputs'''
dev_neg, dev_pos, dev_fta, test_neg, test_pos, test_fta =\
self._process_scores(dev_score, test_score)
dev_neg, dev_pos, dev_fta, eval_neg, eval_pos, eval_fta =\
self._process_scores(dev_score, eval_score)
threshold = utils.get_thres(self._criter, dev_neg, dev_pos, self._far) \
if self._thres is None else self._thres[idx]
title = self._titles[idx] if self._titles is not None else None
if self._thres is None:
far_str = ''
if self._criter == 'far' and self._far is not None:
far_str = str(self._far)
click.echo("[Min. criterion: %s %s] Threshold on Development set `%s`: %e"\
% (self._criter.upper(), far_str, dev_file, threshold),