diff --git a/bob/bio/base/script/collect_results.py b/bob/bio/base/script/collect_results.py index 1aacf2578c99cb5e9119da24de9ac4acadd8d8ca..3b6182a2d7aa873905eb247a917f6b83c9534d7e 100644 --- a/bob/bio/base/script/collect_results.py +++ b/bob/bio/base/script/collect_results.py @@ -27,6 +27,7 @@ The measure type of the development set can be changed to compute "HTER" or import sys, os, glob import argparse +import numpy import bob.measure import bob.core @@ -42,16 +43,18 @@ def command_line_arguments(command_line_parameters): parser.add_argument('-d', '--devel-name', dest="dev", default="scores-dev", help = "Name of the file containing the development scores") parser.add_argument('-e', '--eval-name', dest="eval", default="scores-eval", help = "Name of the file containing the evaluation scores") parser.add_argument('-D', '--directory', default=".", help = "The directory where the results should be collected from; might include search patterns as '*'.") + parser.add_argument('-r', '--rank', type=int, default=1, help = "The rank for which to compute RR and DIR") + parser.add_argument('-f', '--far-threshold', type=float, default=0.001, help = "The FAR threshold to be used with criterion FAR and DIR") + parser.add_argument('-n', '--nonorm-dir', dest="nonorm", default="nonorm", help = "Directory where the unnormalized scores are found") parser.add_argument('-z', '--ztnorm-dir', dest="ztnorm", default = "ztnorm", help = "Directory where the normalized scores are found") parser.add_argument('-s', '--sort', action='store_true', help = "Sort the results") parser.add_argument('-k', '--sort-key', dest='key', default = 'nonorm-dev', choices= ('nonorm-dev','nonorm-eval','ztnorm-dev','ztnorm-eval','dir'), help = "Sort the results according to the given key") - parser.add_argument('-c', '--criterion', dest='criterion', default = 'EER', choices = ('EER', 'HTER', 'FAR'), + parser.add_argument('-c', '--criterion', dest='criterion', default = 'EER', choices = ('EER', 'HTER', 'FAR', 'RR', 'DIR'), help = "Minimize the threshold on the development set according to the given criterion") parser.add_argument('-o', '--output', help = "Name of the output file that will contain the EER/HTER scores") - parser.add_argument('--parser', default = '4column', choices = ('4column', '5column'), help="The style of the resulting score files; rarely changed") parser.add_argument('--self-test', action='store_true', help=argparse.SUPPRESS) @@ -62,9 +65,6 @@ def command_line_arguments(command_line_parameters): bob.core.log.set_verbosity_level(logger, args.verbose) - # assign the score file parser - args.parser = {'4column' : bob.measure.load.split_four_column, '5column' : bob.measure.load.split_five_column}[args.parser] - return args class Result: @@ -79,32 +79,65 @@ class Result: def _calculate(self, dev_file, eval_file = None): """Calculates the EER and HTER or FRR based on the threshold criterion.""" - dev_neg, dev_pos = self.m_args.parser(dev_file) - - # switch which threshold function to use; - # THIS f***ing piece of code really is what python authors propose: - threshold = { - 'EER' : bob.measure.eer_threshold, - 'HTER' : bob.measure.min_hter_threshold, - 'FAR' : bob.measure.far_threshold - } [self.m_args.criterion](dev_neg, dev_pos) - - # compute far and frr for the given threshold - dev_far, dev_frr = bob.measure.farfrr(dev_neg, dev_pos, threshold) - dev_hter = (dev_far + dev_frr)/2.0 - - if eval_file: - eval_neg, eval_pos = self.m_args.parser(eval_file) - eval_far, eval_frr = bob.measure.farfrr(eval_neg, eval_pos, threshold) - eval_hter = (eval_far + eval_frr)/2.0 - else: - eval_hter = None - eval_frr = None + if self.m_args.criterion in ("RR", "DIR"): + scores_dev = bob.measure.load.cmc(dev_file) + if eval_file is not None: + scores_eval = bob.measure.load.cmc(eval_file) + + if self.m_args.criterion == "DIR": + # get negatives without positives + negatives = [max(neg) for neg, pos in scores_dev if (pos is None or not numpy.array(pos).size) and neg is not None] + if not negatives: + raise ValueError("There need to be at least one pair with only negative scores") + threshold = bob.measure.far_threshold(negatives, [], self.m_args.far_threshold) + DIR_dev = bob.measure.detection_identification_rate(scores_dev, threshold, self.m_args.rank) + if eval_file is not None: + # re-compute the threshold for eval file + negatives = [max(neg) for neg, pos in scores_eval if (pos is None or not numpy.array(pos).size) and neg is not None] + if not negatives: + raise ValueError("There need to be at least one pair with only negative scores") + threshold = bob.measure.far_threshold(negatives, [], self.m_args.far_threshold) + DIR_eval = bob.measure.detection_identification_rate(scores_eval, threshold, self.m_args.rank) + else: + DIR_eval = None + return (DIR_dev, DIR_eval) + + else: + # Recognition Rate + RR_dev = bob.measure.recognition_rate(scores_dev) + RR_eval = None if eval_file is None else bob.measure.recognition_rate(scores_eval) + return (RR_dev, RR_eval) - if self.m_args.criterion == 'FAR': - return (dev_frr, eval_frr) else: - return (dev_hter, eval_hter) + + dev_neg, dev_pos = bob.measure.load.split(dev_file) + + # switch which threshold function to use + if self.m_args.criterion == 'EER': + threshold = bob.measure.far_threshold(dev_neg, dev_pos) + elif self.m_args.criterion == 'HTER': + threshold = bob.measure.min_hter_threshold(dev_neg, dev_pos) + elif self.m_args.criterion == 'FAR': + threshold = bob.measure.far_threshold(dev_neg, dev_pos, self.m_args.far_threshold) + else: + raise NotImplementedError("Criterion %s is not yet implemented", self.m_args.criterion) + + # compute far and frr for the given threshold + dev_far, dev_frr = bob.measure.farfrr(dev_neg, dev_pos, threshold) + dev_hter = (dev_far + dev_frr)/2.0 + + if eval_file: + eval_neg, eval_pos = bob.measure.load.split(eval_file) + eval_far, eval_frr = bob.measure.farfrr(eval_neg, eval_pos, threshold) + eval_hter = (eval_far + eval_frr)/2.0 + else: + eval_hter = None + eval_frr = None + + if self.m_args.criterion == 'FAR': + return (dev_frr, eval_frr) + else: + return (dev_hter, eval_hter) def nonorm(self, dev_file, eval_file = None): self.nonorm_dev, self.nonorm_eval = self._calculate(dev_file, eval_file) @@ -112,10 +145,13 @@ class Result: def ztnorm(self, dev_file, eval_file = None): self.ztnorm_dev, self.ztnorm_eval = self._calculate(dev_file, eval_file) + def valid(self): + return any(a is not None for a in [self.nonorm_dev, self.ztnorm_dev, self.nonorm_eval, self.ztnorm_eval]) + def __str__(self): str = "" for v in [self.nonorm_dev, self.ztnorm_dev, self.nonorm_eval, self.ztnorm_eval]: - if v: + if v is not None: val = "% 2.3f%%"%(v*100) else: val = "None" @@ -125,8 +161,6 @@ class Result: return str[5:] -results = [] - def add_results(args, nonorm, ztnorm = None): """Adds results of the given nonorm and ztnorm directories.""" r = Result(os.path.dirname(nonorm).replace(args.directory+"/", ""), args) @@ -150,6 +184,7 @@ def add_results(args, nonorm, ztnorm = None): else: r.ztnorm(dev_file) + global results results.append(r) @@ -158,8 +193,8 @@ def recurse(args, path): dir_list = os.listdir(path) # check if the score directories are included in the current path - if args.nonorm in dir_list: - if args.ztnorm in dir_list: + if args.nonorm in dir_list or args.nonorm == '.': + if args.ztnorm in dir_list or args.ztnorm == '.': add_results(args, os.path.join(path, args.nonorm), os.path.join(path, args.ztnorm)) else: add_results(args, os.path.join(path, args.nonorm)) @@ -175,7 +210,8 @@ def table(): A = " "*2 + 'dev nonorm'+ " "*5 + 'dev ztnorm' + " "*6 + 'eval nonorm' + " "*4 + 'eval ztnorm' + " "*12 + 'directory\n' A += "-"*100+"\n" for r in results: - A += str(r) + "\n" + if r.valid(): + A += str(r) + "\n" return A @@ -183,6 +219,8 @@ def main(command_line_parameters = None): """Iterates through the desired directory and collects all result files.""" args = command_line_arguments(command_line_parameters) + global results + results = [] # collect results directories = glob.glob(args.directory) for directory in directories: diff --git a/bob/bio/base/test/test_scripts.py b/bob/bio/base/test/test_scripts.py index c7ac7681cfdd3360e04ac24a702bdd2149341f5b..c30d88eeb8b5e1b003262ae1df46453533f931ac 100644 --- a/bob/bio/base/test/test_scripts.py +++ b/bob/bio/base/test/test_scripts.py @@ -537,7 +537,7 @@ def test_evaluate_openset(): def test_resources(): - # simply test that the collect_results script works + # simply test that the resorces script works from bob.bio.base.script.resources import resources, databases with utils.Quiet(): resources(['--types', 'database', 'preprocessor', 'extractor', 'algorithm', 'grid', '--details', '--packages', 'bob.bio.base']) @@ -546,13 +546,39 @@ def test_resources(): def test_collect_results(): # simply test that the collect_results script works - test_dir = tempfile.mkdtemp(prefix='bobtest_') - try: - from bob.bio.base.script.collect_results import main - main(['--directory', test_dir, '--sort', '--sort-key', 'dir', '--criterion', 'FAR', '--self-test']) - finally: - if os.path.exists(test_dir): - os.rmdir(test_dir) + from bob.bio.base.script.collect_results import main + # FAR criterion + main([ + '-D', data_dir, + '-d', 'scores-nonorm-dev', + '-e', 'scores-nonorm-fivecol-dev', + '-n', '.', '-z', '.', + '--sort', '--sort-key', 'dir', + '--criterion', 'FAR', '--far-threshold', '0.1', + '--self-test', '-v' + ]) + + # Recognition Rate + main([ + '-D', data_dir, + '-d', 'scores-nonorm-dev', + '-e', 'scores-nonorm-fivecol-dev', + '-n', '.', '-z', '.', + '--sort', '--sort-key', 'dir', + '--criterion', 'RR', '--rank', '10', + '--self-test', '-v' + ]) + + # DIR + main([ + '-D', data_dir, + '-d', 'scores-nonorm-openset-dev', + '-n', '.', '-z', '.', + '--sort', '--sort-key', 'dir', + '--criterion', 'DIR', '--far-threshold', '0.1', + '--self-test', '-v' + ]) + @utils.grid_available