Skip to content
Snippets Groups Projects

Fixing and adding features to the scores generation script

Merged Yannick DAYER requested to merge fix-gen into master
All threads resolved!
Files
2
+ 275
73
@@ -3,21 +3,17 @@
@@ -3,21 +3,17 @@
import os
import os
import logging
import logging
import numpy
import numpy
import random
import click
import click
from click.types import FLOAT
from bob.extension.scripts.click_helper import verbosity_option
from bob.extension.scripts.click_helper import verbosity_option
import bob.core
from bob.io.base import create_directories_safe
from bob.io.base import create_directories_safe
from bob.measure.script import common_options
from bob.measure.script import common_options
logger = logging.getLogger(__name__)
logger = logging.getLogger(__name__)
NUM_NEG = 5000
NUM_POS = 5000
def gen_score_distr(
def gen_score_distr(mean_neg, mean_pos, sigma_neg=10, sigma_pos=10):
mean_neg, mean_pos, sigma_neg=10, sigma_pos=10, n_neg=5000, n_pos=5000, seed=0
 
):
"""Generate scores from normal distributions
"""Generate scores from normal distributions
Parameters
Parameters
@@ -30,6 +26,14 @@ def gen_score_distr(mean_neg, mean_pos, sigma_neg=10, sigma_pos=10):
@@ -30,6 +26,14 @@ def gen_score_distr(mean_neg, mean_pos, sigma_neg=10, sigma_pos=10):
STDev for negative scores
STDev for negative scores
sigma_pos : float
sigma_pos : float
STDev for positive scores
STDev for positive scores
 
n_pos: int
 
The number of positive scores generated
 
n_neg: int
 
The number of negative scores generated
 
seed: int
 
A value to initialize the Random Number generator. Giving the same
 
value (or not specifying 'seed') on two different calls will generate
 
the same lists of scores.
Returns
Returns
-------
-------
@@ -38,19 +42,28 @@ def gen_score_distr(mean_neg, mean_pos, sigma_neg=10, sigma_pos=10):
@@ -38,19 +42,28 @@ def gen_score_distr(mean_neg, mean_pos, sigma_neg=10, sigma_pos=10):
pos_scores : :any:`list`
pos_scores : :any:`list`
Positive scores
Positive scores
"""
"""
mt = bob.core.random.mt19937() # initialise the random number generator
neg_generator = bob.core.random.normal(numpy.float32, mean_neg, sigma_neg)
logger.debug("Initializing RNG.")
pos_generator = bob.core.random.normal(numpy.float32, mean_pos, sigma_pos)
numpy.random.seed(seed)
 
 
logger.info(f"Generating {n_neg} negative and {n_pos} positive scores.")
neg_scores = [neg_generator(mt) for _ in range(NUM_NEG)]
neg_scores = numpy.random.normal(loc=mean_neg, scale=sigma_neg, size=n_neg)
pos_scores = [pos_generator(mt) for _ in range(NUM_NEG)]
pos_scores = numpy.random.normal(loc=mean_pos, scale=sigma_pos, size=n_pos)
return neg_scores, pos_scores
return neg_scores, pos_scores
def write_scores_to_file(neg, pos, filename, n_subjects=5, n_probes_per_subject=5,
def write_scores_to_file(
n_unknown_subjects=0, neg_unknown=None, five_col=False):
neg,
 
pos,
 
filename,
 
n_subjects=5,
 
n_probes_per_subject=5,
 
n_unknown_subjects=0,
 
neg_unknown=None,
 
five_col=False,
 
):
""" Writes score distributions
""" Writes score distributions
Parameters
Parameters
@@ -61,82 +74,271 @@ def write_scores_to_file(neg, pos, filename, n_subjects=5, n_probes_per_subject=
@@ -61,82 +74,271 @@ def write_scores_to_file(neg, pos, filename, n_subjects=5, n_probes_per_subject=
Scores for positive samples.
Scores for positive samples.
filename : str
filename : str
The path to write the score to.
The path to write the score to.
n_sys : int
n_subjects: int
Number of different systems
Number of different subjects
 
n_probes_per_subject: int
 
Number of different samples used as probe for each subject
 
n_unknown_subjects: int
 
The number of unknown (no registered model) subjects
 
neg_unknown: None or list
 
The of unknown subjects scores
five_col : bool
five_col : bool
If 5-colum format, else 4-column
If 5-colum format, else 4-column
"""
"""
 
logger.debug(f"Creating result directories ('{filename}').")
create_directories_safe(os.path.dirname(filename))
create_directories_safe(os.path.dirname(filename))
s_subjects = ['x%d' % i for i in range(n_subjects)]
s_subjects = ["x%d" % i for i in range(n_subjects)]
with open(filename, 'wt') as f:
logger.debug("Writing scores to files.")
for i in pos:
s_name = random.choice(s_subjects)
with open(filename, "wt") as f:
s_five = ' ' if not five_col else ' d' + \
# Generate one line per probe (unless "--force-count" specified)
random.choice(s_subjects) + ' '
logger.debug("Writing positive scores.")
probe_id = "%s_%d" %(s_name, random.randint(0, n_probes_per_subject-1))
for i, score in enumerate(pos):
f.write('%s%s%s %s %f\n' % (s_name, s_five, s_name, probe_id, i))
s_name = s_subjects[int(i / n_probes_per_subject) % n_subjects]
for i in neg:
s_five = " " if not five_col else " d" + s_name + " "
s_names = random.sample(s_subjects, 2)
probe_id = "%s_%d" % (s_name, i % n_probes_per_subject)
s_five = ' ' if not five_col else ' d' + \
f.write("%s%s%s %s %f\n" % (s_name, s_five, s_name, probe_id, score))
random.choice(s_names) + ' '
probe_id = "%s_%d" %(s_names[1], random.randint(0, n_probes_per_subject-1))
f.write('%s%s%s %s %f\n' % (s_names[0], s_five, s_names[1], probe_id, i))
 
# Generate one line per probe against each ref (unless "--force-count" specified)
 
logger.debug("Writing negative scores.")
 
for i, score in enumerate(neg):
 
n_impostors = n_subjects - 1
 
ref = s_subjects[int(i / n_probes_per_subject / n_impostors) % n_subjects]
 
impostors = [s for s in s_subjects if s != ref] # ignore pos
 
probe = impostors[int(i / n_probes_per_subject) % n_impostors]
 
s_five = " " if not five_col else " d" + ref
 
probe_id = "%s_%d" % (probe, i % n_probes_per_subject)
 
f.write("%s%s%s %s %f\n" % (ref, s_five, probe, probe_id, score))
 
 
logger.debug("Writing unknown scores.")
if neg_unknown is not None:
if neg_unknown is not None:
s_unknown_subjects = ['u%d' % i for i in range(n_unknown_subjects)]
s_unknown_subjects = ["u%d" % i for i in range(n_unknown_subjects)]
for i in neg_unknown:
for i, score in enumerate(neg_unknown):
s_name = random.choice(s_subjects)
ref = s_subjects[
s_name_probe = random.choice(s_unknown_subjects)
int(i / n_probes_per_subject / n_unknown_subjects) % n_subjects
s_five = ' ' if not five_col else ' d' + \
]
random.choice(s_subjects) + ' '
probe = s_unknown_subjects[
probe_id = "%s_%d" %(s_name_probe, random.randint(0, n_probes_per_subject-1))
int(i / n_probes_per_subject) % n_unknown_subjects
f.write('%s%s%s %s %f\n' % (s_name, s_five, s_name_probe, probe_id, i))
]
s_five = " " if not five_col else " d" + ref + " "
probe_id = "%s_%d" % (probe, i % n_probes_per_subject)
@click.command()
f.write("%s%s%s %s %f\n" % (ref, s_five, probe, probe_id, score))
@click.argument('outdir')
@click.option('-mm', '--mean-match', default=10, type=FLOAT, show_default=True,\
help="Mean for the positive scores distribution")
@click.command(
@click.option('-mnm', '--mean-non-match', default=-10, type=FLOAT, show_default=True,\
epilog="""
help="Mean for the negative scores distribution")
Scores generation examples:
@click.option('-p', '--n-probes-per-subjects', default=5, type=click.INT, show_default=True,\
help="Number of probes per subject")
Output 'scores-dev' and 'scores-eval' in a new folder 'generated_scores/':
@click.option('-s', '--n-subjects', default=5, type=click.INT, show_default=True,\
help="Number of subjects")
$ bob bio gen ./generated_scores
@click.option('-p', '--sigma-positive', default=10, type=click.FLOAT, show_default=True,\
help="Variance for the positive score distributions")
Output scores similar to a system evaluated on the AT&T dataset dev group:
@click.option('-n', '--sigma-negative', default=10, type=click.FLOAT, show_default=True,\
help="Variance for the negative score distributions")
$ bob bio gen -s 20 -p 5 ./generated_scores
@click.option('-u', '--n-unknown-subjects', default=0, type=click.INT, show_default=True,\
help="Number of unknown subjects (useful for openset plots)")
Output a given number of scores in each file:
@click.option('--five-col/--four-col', default=False, show_default=True)
 
$ bob bio gen -f --n-neg 500 --n-pos 100 ./generated_scores
 
 
Include unknown subjects scores:
 
 
$ bob bio gen -s 5 -u 2 ./generated_scores
 
 
Change the mean and standard deviation of the scores distributions:
 
 
$ bob bio gen -mm 1 -sp 0.3 -mnm -1 -sn 0.5 ./generated_scores
 
 
You can observe the distributions histograms in a pdf file with:
 
 
$ bob bio hist -e ./generated_scores/scores-{dev,eval} -o hist_gen.pdf
 
"""
 
)
 
@click.argument("outdir")
 
@click.option(
 
"-mm",
 
"--mean-match",
 
default=10,
 
type=click.FLOAT,
 
show_default=True,
 
help="Mean for the positive scores distribution",
 
)
 
@click.option(
 
"-mnm",
 
"--mean-non-match",
 
default=-10,
 
type=click.FLOAT,
 
show_default=True,
 
help="Mean for the negative scores distribution",
 
)
 
@click.option(
 
"-p",
 
"--n-probes-per-subject",
 
default=5,
 
type=click.INT,
 
show_default=True,
 
help="Number of probes per subject",
 
)
 
@click.option(
 
"-s",
 
"--n-subjects",
 
default=50,
 
type=click.INT,
 
show_default=True,
 
help="Number of subjects",
 
)
 
@click.option(
 
"-sp",
 
"--sigma-positive",
 
default=10,
 
type=click.FLOAT,
 
show_default=True,
 
help="Variance for the positive score distributions",
 
)
 
@click.option(
 
"-sn",
 
"--sigma-negative",
 
default=10,
 
type=click.FLOAT,
 
show_default=True,
 
help="Variance for the negative score distributions",
 
)
 
@click.option(
 
"-u",
 
"--n-unknown-subjects",
 
default=0,
 
type=click.INT,
 
show_default=True,
 
help="Number of unknown subjects (useful for openset plots)",
 
)
 
@click.option(
 
"-f",
 
"--force-count",
 
"force_count",
 
is_flag=True,
 
help="Use --n-pos and --n-neg amounts instead of the subject and sample counts",
 
)
 
@click.option(
 
"--n-pos",
 
"n_pos",
 
default=5000,
 
type=click.INT,
 
show_default=True,
 
help="Number of Positive verifications (number of lines in the file)",
 
)
 
@click.option(
 
"--n-neg",
 
"n_neg",
 
default=5000,
 
type=click.INT,
 
show_default=True,
 
help="Number of Negative verifications (number of lines in the file)",
 
)
 
@click.option(
 
"--n-unk",
 
"n_unk",
 
default=5000,
 
type=click.INT,
 
show_default=True,
 
help="Number of Unknown verifications (number of lines in the file)",
 
)
 
@click.option("--five-col/--four-col", default=False, show_default=True)
@verbosity_option()
@verbosity_option()
def gen(outdir, mean_match, mean_non_match, n_probes_per_subjects, n_subjects,\
def gen(
sigma_positive, sigma_negative, n_unknown_subjects, five_col, **kwargs):
outdir,
 
mean_match,
 
mean_non_match,
 
n_probes_per_subject,
 
n_subjects,
 
sigma_positive,
 
sigma_negative,
 
n_unknown_subjects,
 
five_col,
 
force_count,
 
n_pos,
 
n_neg,
 
n_unk,
 
**kwargs,
 
):
"""Generate random scores.
"""Generate random scores.
 
Generates random scores in 4col or 5col format. The scores are generated
Generates random scores in 4col or 5col format. The scores are generated
using Gaussian distribution whose mean is an input
using Gaussian distribution whose mean and variance are an input
parameter. The generated scores can be used as hypothetical datasets.
parameter. The generated scores can be used as hypothetical datasets.
 
 
This command generates scores relative to the number of subjects and
 
probes per subjects, unless the -f flag is set. In that case, the --n-pos
 
and --n-neg options are used as number of genuine and impostor
 
comparisons.
"""
"""
 
 
# Compute the number of verifications needed
 
if force_count:
 
neg_count, pos_count, unknown_count = n_neg, n_pos, n_unk
 
else:
 
# One reference (model), and `n_probes_per_subject` probes per subject
 
neg_count = n_subjects * n_probes_per_subject * (n_subjects - 1)
 
pos_count = n_probes_per_subject * n_subjects
 
unknown_count = n_unknown_subjects * n_subjects * n_probes_per_subject
 
# Generate the data
# Generate the data
neg_dev, pos_dev = gen_score_distr(mean_non_match, mean_match, sigma_negative, sigma_positive)
logger.info("Generating dev scores.")
neg_eval, pos_eval = gen_score_distr(mean_non_match, mean_match, sigma_negative, sigma_positive)
neg_dev, pos_dev = gen_score_distr(
 
mean_non_match,
 
mean_match,
 
sigma_negative,
 
sigma_positive,
 
n_neg=neg_count,
 
n_pos=pos_count,
 
seed=0,
 
)
 
logger.info("Generating eval scores.")
 
neg_eval, pos_eval = gen_score_distr(
 
mean_non_match,
 
mean_match,
 
sigma_negative,
 
sigma_positive,
 
n_neg=neg_count,
 
n_pos=pos_count,
 
seed=1,
 
)
# For simplicity I will use the same distribution for dev-eval
# For simplicity I will use the same distribution for dev-eval
if n_unknown_subjects:
if n_unknown_subjects:
neg_unknown,_ = gen_score_distr(mean_non_match, mean_match, sigma_negative, sigma_positive)
logger.info("Generating unknown scores.")
 
neg_unknown, _ = gen_score_distr(
 
mean_non_match,
 
mean_match,
 
sigma_negative,
 
sigma_positive,
 
n_neg=unknown_count,
 
n_pos=0,
 
seed=2,
 
)
else:
else:
neg_unknown = None
neg_unknown = None
# Write the data into files
# Write the data into files
write_scores_to_file(neg_dev, pos_dev,
logger.info("Saving results.")
os.path.join(outdir, 'scores-dev'),
write_scores_to_file(
n_subjects, n_probes_per_subjects,
neg_dev,
n_unknown_subjects, neg_unknown, five_col)
pos_dev,
os.path.join(outdir, "scores-dev"),
write_scores_to_file(neg_eval, pos_eval,
n_subjects,
os.path.join(outdir, 'scores-eval'),
n_probes_per_subject,
n_subjects, n_probes_per_subjects,
n_unknown_subjects,
n_unknown_subjects, neg_unknown, five_col)
neg_unknown,
 
five_col,
 
)
 
write_scores_to_file(
 
neg_eval,
 
pos_eval,
 
os.path.join(outdir, "scores-eval"),
 
n_subjects,
 
n_probes_per_subject,
 
n_unknown_subjects,
 
neg_unknown,
 
five_col,
 
)
Loading