Merge branch 'cross' into 'master'

Cross database testing evaluation Adds a new command ``bob pad cross`` See merge request !53

Merge branch 'cross' into 'master'
5d9460a9 · Pavel KORSHUNOV · 5a21e60d · 7a8a04ba · 5d9460a9 · 5d9460a9
Commit 5d9460a9 authored 6 years ago by Pavel KORSHUNOV
--- a/bob/pad/base/algorithm/SVM.py
+++ b/bob/pad/base/algorithm/SVM.py
@@ -477,6 +477,8 @@ class SVM(Algorithm):

            features_array = feature

+        features_array = features_array.astype('float64')
+
        if not (self.machine_type == 'ONE_CLASS'):  # two-class SVM case

            probabilities = self.machine.predict_class_and_probabilities(

--- a/bob/pad/base/script/cross.py
+++ b/bob/pad/base/script/cross.py
+"""Prints Cross-db metrics analysis
+"""
+import click
+import json
+import jinja2
+import logging
+import math
+import os
+import yaml
+from bob.bio.base.score.load import split
+from bob.extension.scripts.click_helper import (
+    verbosity_option, bool_option, log_parameters)
+from bob.measure import eer_threshold, farfrr
+from bob.measure.script import common_options
+from bob.measure.utils import get_fta
+from gridtk.generator import expand
+from tabulate import tabulate
+
+logger = logging.getLogger(__name__)
+
+
+@click.command(epilog='''\b
+Examples:
+  $ bin/bob pad cross 'results/{{ evaluation.database }}/{{ algorithm }}/{{ evaluation.protocol }}/scores/scores-{{ group }}' \
+    -td replaymobile -d replaymobile -p grandtest -d oulunpu -p Protocol_1 \
+    -a replaymobile_frame-diff-svm \
+    -a replaymobile_qm-svm-64 \
+    -a replaymobile_lbp-svm-64 \
+    > replaymobile.rst &
+''')
+@click.argument('score_jinja_template')
+@click.option('-d', '--database', 'databases', multiple=True, required=True,
+              show_default=True,
+              help='Names of the evaluation databases')
+@click.option('-p', '--protocol', 'protocols', multiple=True, required=True,
+              show_default=True,
+              help='Names of the protocols of the evaluation databases')
+@click.option('-a', '--algorithm', 'algorithms', multiple=True, required=True,
+              show_default=True,
+              help='Names of the algorithms')
+@click.option('-n', '--names', type=click.File('r'),
+              help='Name of algorithms to show in the table. Provide a path '
+              'to a json file maps algorithm names to names that you want to '
+              'see in the table.')
+@click.option('-td', '--train-database', required=True,
+              help='The database that was used to train the algorithms.')
+@click.option('-g', '--group', 'groups', multiple=True, show_default=True,
+              default=['train', 'dev', 'eval'])
+@bool_option('sort', 's', 'whether the table should be sorted.', True)
+@common_options.table_option()
+@common_options.output_log_metric_option()
+@verbosity_option()
+@click.pass_context
+def cross(ctx, score_jinja_template, databases, protocols, algorithms,
+          names, train_database, groups, sort, **kwargs):
+    """Cross-db analysis metrics
+    """
+    log_parameters(logger)
+
+    names = {} if names is None else json.load(names)
+
+    env = jinja2.Environment(undefined=jinja2.StrictUndefined)
+
+    data = {
+        'evaluation': [{'database': db, 'protocol': proto}
+                       for db, proto in zip(databases, protocols)],
+        'algorithm': algorithms,
+        'group': groups,
+    }
+
+    metrics = {}
+
+    for variables in expand(yaml.dump(data, Dumper=yaml.SafeDumper)):
+        logger.debug(variables)
+
+        score_path = env.from_string(score_jinja_template).render(variables)
+        logger.debug(score_path)
+
+        database, protocol, algorithm, group = \
+            variables['evaluation']['database'], \
+            variables['evaluation']['protocol'], \
+            variables['algorithm'], variables['group']
+
+        # if algorithm name does not have train_database name in it.
+        if train_database not in algorithm and database != train_database:
+            score_path = score_path.replace(
+                algorithm, database + '_' + algorithm)
+
+        if not os.path.exists(score_path):
+            metrics[(database, protocol, algorithm, group)] = \
+                (float('nan'), ) * 5
+            continue
+
+        (neg, pos), fta = get_fta(split(score_path))
+
+        if group == 'eval':
+            threshold = metrics[(database, protocol, algorithm, 'dev')][1]
+        else:
+            try:
+                threshold = eer_threshold(neg, pos)
+            except RuntimeError:
+                logger.error("Something wrong with {}".format(score_path))
+                raise
+
+        far, frr = farfrr(neg, pos, threshold)
+        hter = (far + frr) / 2
+
+        metrics[(database, protocol, algorithm, group)] = \
+            (hter, threshold, fta, far, frr)
+
+    logger.debug('metrics: %s', metrics)
+
+    headers = ["Algorithms"]
+    for db in databases:
+        headers += [db + "\nEER_t", "\nEER_d", "\nAPCER", "\nBPCER", "\nACER"]
+    rows = []
+
+    # sort the algorithms based on HTER test, EER dev, EER train
+    if sort:
+        train_protocol = protocols[databases.index(train_database)]
+
+        def sort_key(alg):
+            r = []
+            for grp in ('eval', 'dev', 'train'):
+                hter = metrics[(train_database, train_protocol, alg, group)][0]
+                r.append(1 if math.isnan(hter) else hter)
+            return tuple(r)
+        algorithms = sorted(algorithms, key=sort_key)
+
+    for algorithm in algorithms:
+        name = algorithm.replace(train_database + '_', '')
+        name = name.replace(train_protocol + '_', '')
+        name = names.get(name, name)
+        rows.append([name])
+        for database, protocol in zip(databases, protocols):
+            cell = []
+            for group in groups:
+                hter, threshold, fta, far, frr = metrics[(
+                    database, protocol, algorithm, group)]
+                if group == 'eval':
+                    cell += [far, frr, hter]
+                else:
+                    cell += [hter]
+            cell = [round(c * 100, 1) for c in cell]
+            rows[-1].extend(cell)
+
+    title = ' Trained on {} '.format(train_database)
+    title_line = '\n' + '=' * len(title) + '\n'
+    click.echo(title_line + title + title_line, file=ctx.meta['log'])
+    click.echo(tabulate(rows, headers, ctx.meta['tablefmt'], floatfmt=".1f"),
+               file=ctx.meta['log'])
--- a/bob/pad/base/script/finalize_scores.py
+++ b/bob/pad/base/script/finalize_scores.py
+"""Finalizes the scores that are produced by spoof.py
+"""
+import click
+import numpy
+import logging
+from bob.extension.scripts.click_helper import (
+    verbosity_option, log_parameters)
+
+logger = logging.getLogger(__name__)
+
+
+@click.command(name='finalize-scores', epilog='''\b
+Examples:
+  $ bin/bob pad finalize_scores /path/to/scores-dev
+  $ bin/bob pad finalize_scores /path/to/scores-{dev,eval}
+''')
+@click.argument('scores', type=click.Path(exists=True, dir_okay=False),
+                nargs=-1)
+@click.option('-m', '--method', default='mean',
+              type=click.Choice(['mean', 'min', 'max']), show_default=True,
+              help='The method to use when finalizing the scores.')
+@verbosity_option()
+def finalize_scores(scores, method, **kwargs):
+    """Finalizes the scores given by spoof.py
+    When using bob.pad.base, Algorithms can produce several score values for
+    each unique sample. You can use this script to average (or min/max) these
+    scores to have one final score per sample.
+
+    The conversion is done in-place. The order of scores will change.
+    """
+    log_parameters(logger)
+
+    mean = {'mean': numpy.nanmean, 'max': numpy.nanmax, 'min': numpy.nanmin}[method]
+
+    for path in scores:
+        new_lines = []
+        with open(path) as f:
+            old_lines = f.readlines()
+            old_lines.sort()
+        for i, line in enumerate(old_lines):
+            uniq, s = line.strip().rsplit(maxsplit=1)
+            s = float(s)
+            if i == 0:
+                last_line = uniq
+                last_scores = []
+
+            if uniq == last_line:
+                last_scores.append(s)
+            else:
+                new_lines.append('{} {}\n'.format(
+                    last_line, mean(last_scores)))
+                last_scores = [s]
+
+            last_line = uniq
+
+        else:  # this else is for the for loop
+            new_lines.append('{} {}\n'.format(last_line, mean(last_scores)))
+
+        with open(path, 'w') as f:
+            f.writelines(new_lines)
--- a/bob/pad/base/utils/helper_functions.py
+++ b/bob/pad/base/utils/helper_functions.py
@@ -49,9 +49,9 @@ def convert_and_prepare_features(features):
    if isinstance(
            features[0],
            bob.bio.video.FrameContainer):  # if FrameContainer convert to 2D numpy array
-        return convert_list_of_frame_cont_to_array(features)
+        return convert_list_of_frame_cont_to_array(features).astype('float64')
    else:
-        return np.vstack(features)
+        return np.vstack(features).astype('float64')


 def convert_list_of_frame_cont_to_array(frame_containers):
@@ -256,9 +256,9 @@ def mean_std_normalize(features,
        features_mean = np.mean(features, axis=0)

        features_std = np.std(features, axis=0)
-        
+
    features_std[features_std==0.0]=1.0
-    
+
    row_norm_list = []

    for row in features:  # row is a sample

--- a/setup.py
+++ b/setup.py
@@ -147,6 +147,8 @@ setup(
            'epc              = bob.pad.base.script.pad_commands:epc',
            'gen              = bob.pad.base.script.pad_commands:gen',
            'evaluate         = bob.pad.base.script.pad_commands:evaluate',
+            'cross            = bob.pad.base.script.cross:cross',
+            'finalize-scores  = bob.pad.base.script.finalize_scores:finalize_scores',
        ],

        # bob vuln scripts