Merge branch 'fix-gen' into 'master'

Fixing and adding features to the scores generation script See merge request !198

Merge branch 'fix-gen' into 'master'
5814a2de · Amir MOHAMMADI · 213d6069 · 3c096a7b · 5814a2de · 5814a2de
Commit 5814a2de authored 4 years ago by Amir MOHAMMADI
--- a/bob/bio/base/script/gen.py
+++ b/bob/bio/base/script/gen.py
@@ -3,21 +3,17 @@
 import os
 import logging
 import numpy
-import random
 import click
-from click.types import FLOAT
 from bob.extension.scripts.click_helper import verbosity_option
-import bob.core
 from bob.io.base import create_directories_safe
 from bob.measure.script import common_options

 logger = logging.getLogger(__name__)

-NUM_NEG = 5000
-NUM_POS = 5000

-
-def gen_score_distr(mean_neg, mean_pos, sigma_neg=10, sigma_pos=10):
+def gen_score_distr(
+    mean_neg, mean_pos, sigma_neg=10, sigma_pos=10, n_neg=5000, n_pos=5000, seed=0
+):
    """Generate scores from normal distributions

    Parameters
@@ -30,6 +26,14 @@ def gen_score_distr(mean_neg, mean_pos, sigma_neg=10, sigma_pos=10):
        STDev for negative scores
    sigma_pos : float
        STDev for positive scores
+    n_pos: int
+        The number of positive scores generated
+    n_neg: int
+        The number of negative scores generated
+    seed: int
+        A value to initialize the Random Number generator. Giving the same
+        value (or not specifying 'seed') on two different calls will generate
+        the same lists of scores.

    Returns
    -------
@@ -38,19 +42,28 @@ def gen_score_distr(mean_neg, mean_pos, sigma_neg=10, sigma_pos=10):
    pos_scores : :any:`list`
        Positive scores
    """
-    mt = bob.core.random.mt19937()  # initialise the random number generator

-    neg_generator = bob.core.random.normal(numpy.float32, mean_neg, sigma_neg)
-    pos_generator = bob.core.random.normal(numpy.float32, mean_pos, sigma_pos)
+    logger.debug("Initializing RNG.")
+    numpy.random.seed(seed)
+
+    logger.info(f"Generating {n_neg} negative and {n_pos} positive scores.")

-    neg_scores = [neg_generator(mt) for _ in range(NUM_NEG)]
-    pos_scores = [pos_generator(mt) for _ in range(NUM_NEG)]
+    neg_scores = numpy.random.normal(loc=mean_neg, scale=sigma_neg, size=n_neg)
+    pos_scores = numpy.random.normal(loc=mean_pos, scale=sigma_pos, size=n_pos)

    return neg_scores, pos_scores


-def write_scores_to_file(neg, pos, filename, n_subjects=5, n_probes_per_subject=5,
-                         n_unknown_subjects=0, neg_unknown=None, five_col=False):
+def write_scores_to_file(
+    neg,
+    pos,
+    filename,
+    n_subjects=5,
+    n_probes_per_subject=5,
+    n_unknown_subjects=0,
+    neg_unknown=None,
+    five_col=False,
+):
    """ Writes score distributions

    Parameters
@@ -61,82 +74,271 @@ def write_scores_to_file(neg, pos, filename, n_subjects=5, n_probes_per_subject=
        Scores for positive samples.
    filename : str
        The path to write the score to.
-    n_sys : int
-        Number of different systems
+    n_subjects: int
+        Number of different subjects
+    n_probes_per_subject: int
+        Number of different samples used as probe for each subject
+    n_unknown_subjects: int
+        The number of unknown (no registered model) subjects
+    neg_unknown: None or list
+        The of unknown subjects scores
    five_col : bool
        If 5-colum format, else 4-column
    """
+    logger.debug(f"Creating result directories ('{filename}').")
    create_directories_safe(os.path.dirname(filename))
-    s_subjects = ['x%d' % i for i in range(n_subjects)]
-
-    with open(filename, 'wt') as f:
-        for i in pos:
-            s_name = random.choice(s_subjects)
-            s_five = ' ' if not five_col else ' d' + \
-                random.choice(s_subjects) + ' '
-            probe_id = "%s_%d" %(s_name, random.randint(0, n_probes_per_subject-1))
-            f.write('%s%s%s %s %f\n' % (s_name, s_five, s_name, probe_id, i))
-        for i in neg:
-            s_names = random.sample(s_subjects, 2)
-            s_five = ' ' if not five_col else ' d' + \
-                random.choice(s_names) + ' '
-            probe_id = "%s_%d" %(s_names[1], random.randint(0, n_probes_per_subject-1))
-            f.write('%s%s%s %s %f\n' % (s_names[0], s_five, s_names[1], probe_id, i))
+    s_subjects = ["x%d" % i for i in range(n_subjects)]
+
+    logger.debug("Writing scores to files.")
+
+    with open(filename, "wt") as f:
+        # Generate one line per probe (unless "--force-count" specified)
+        logger.debug("Writing positive scores.")
+        for i, score in enumerate(pos):
+            s_name = s_subjects[int(i / n_probes_per_subject) % n_subjects]
+            s_five = " " if not five_col else " d" + s_name + " "
+            probe_id = "%s_%d" % (s_name, i % n_probes_per_subject)
+            f.write("%s%s%s %s %f\n" % (s_name, s_five, s_name, probe_id, score))

+        # Generate one line per probe against each ref (unless "--force-count" specified)
+        logger.debug("Writing negative scores.")
+        for i, score in enumerate(neg):
+            n_impostors = n_subjects - 1
+            ref = s_subjects[int(i / n_probes_per_subject / n_impostors) % n_subjects]
+            impostors = [s for s in s_subjects if s != ref]  # ignore pos
+            probe = impostors[int(i / n_probes_per_subject) % n_impostors]
+            s_five = " " if not five_col else " d" + ref
+            probe_id = "%s_%d" % (probe, i % n_probes_per_subject)
+            f.write("%s%s%s %s %f\n" % (ref, s_five, probe, probe_id, score))
+
+        logger.debug("Writing unknown scores.")
        if neg_unknown is not None:
-            s_unknown_subjects = ['u%d' % i for i in range(n_unknown_subjects)]
-            for i in neg_unknown:
-                s_name = random.choice(s_subjects)
-                s_name_probe = random.choice(s_unknown_subjects)
-                s_five = ' ' if not five_col else ' d' + \
-                    random.choice(s_subjects) + ' '
-                probe_id = "%s_%d" %(s_name_probe, random.randint(0, n_probes_per_subject-1))
-                f.write('%s%s%s %s %f\n' % (s_name, s_five, s_name_probe, probe_id, i))
-
-
-@click.command()
-@click.argument('outdir')
-@click.option('-mm', '--mean-match', default=10, type=FLOAT, show_default=True,\
-              help="Mean for the positive scores distribution")
-@click.option('-mnm', '--mean-non-match', default=-10, type=FLOAT, show_default=True,\
-             help="Mean for the negative scores distribution")
-@click.option('-p', '--n-probes-per-subjects', default=5, type=click.INT, show_default=True,\
-              help="Number of probes per subject")
-@click.option('-s', '--n-subjects', default=5, type=click.INT, show_default=True,\
-              help="Number of subjects")
-@click.option('-p', '--sigma-positive', default=10, type=click.FLOAT, show_default=True,\
-              help="Variance for the positive score distributions")
-@click.option('-n', '--sigma-negative', default=10, type=click.FLOAT, show_default=True,\
-              help="Variance for the negative score distributions")
-@click.option('-u', '--n-unknown-subjects', default=0, type=click.INT, show_default=True,\
-              help="Number of unknown subjects (useful for openset plots)")
-@click.option('--five-col/--four-col', default=False, show_default=True)
+            s_unknown_subjects = ["u%d" % i for i in range(n_unknown_subjects)]
+            for i, score in enumerate(neg_unknown):
+                ref = s_subjects[
+                    int(i / n_probes_per_subject / n_unknown_subjects) % n_subjects
+                ]
+                probe = s_unknown_subjects[
+                    int(i / n_probes_per_subject) % n_unknown_subjects
+                ]
+                s_five = " " if not five_col else " d" + ref + " "
+                probe_id = "%s_%d" % (probe, i % n_probes_per_subject)
+                f.write("%s%s%s %s %f\n" % (ref, s_five, probe, probe_id, score))
+
+
+@click.command(
+    epilog="""
+Scores generation examples:
+
+Output 'scores-dev' and 'scores-eval' in a new folder 'generated_scores/':
+
+  $ bob bio gen ./generated_scores
+
+Output scores similar to a system evaluated on the AT&T dataset dev group:
+
+  $ bob bio gen -s 20 -p 5 ./generated_scores
+
+Output a given number of scores in each file:
+
+  $ bob bio gen -f --n-neg 500 --n-pos 100 ./generated_scores
+
+Include unknown subjects scores:
+
+  $ bob bio gen -s 5 -u 2 ./generated_scores
+
+Change the mean and standard deviation of the scores distributions:
+
+  $ bob bio gen -mm 1 -sp 0.3 -mnm -1 -sn 0.5 ./generated_scores
+
+You can observe the distributions histograms in a pdf file with:
+
+  $ bob bio hist -e ./generated_scores/scores-{dev,eval} -o hist_gen.pdf
+"""
+)
+@click.argument("outdir")
+@click.option(
+    "-mm",
+    "--mean-match",
+    default=10,
+    type=click.FLOAT,
+    show_default=True,
+    help="Mean for the positive scores distribution",
+)
+@click.option(
+    "-mnm",
+    "--mean-non-match",
+    default=-10,
+    type=click.FLOAT,
+    show_default=True,
+    help="Mean for the negative scores distribution",
+)
+@click.option(
+    "-p",
+    "--n-probes-per-subject",
+    default=5,
+    type=click.INT,
+    show_default=True,
+    help="Number of probes per subject",
+)
+@click.option(
+    "-s",
+    "--n-subjects",
+    default=50,
+    type=click.INT,
+    show_default=True,
+    help="Number of subjects",
+)
+@click.option(
+    "-sp",
+    "--sigma-positive",
+    default=10,
+    type=click.FLOAT,
+    show_default=True,
+    help="Variance for the positive score distributions",
+)
+@click.option(
+    "-sn",
+    "--sigma-negative",
+    default=10,
+    type=click.FLOAT,
+    show_default=True,
+    help="Variance for the negative score distributions",
+)
+@click.option(
+    "-u",
+    "--n-unknown-subjects",
+    default=0,
+    type=click.INT,
+    show_default=True,
+    help="Number of unknown subjects (useful for openset plots)",
+)
+@click.option(
+    "-f",
+    "--force-count",
+    "force_count",
+    is_flag=True,
+    help="Use --n-pos and --n-neg amounts instead of the subject and sample counts",
+)
+@click.option(
+    "--n-pos",
+    "n_pos",
+    default=5000,
+    type=click.INT,
+    show_default=True,
+    help="Number of Positive verifications (number of lines in the file)",
+)
+@click.option(
+    "--n-neg",
+    "n_neg",
+    default=5000,
+    type=click.INT,
+    show_default=True,
+    help="Number of Negative verifications (number of lines in the file)",
+)
+@click.option(
+    "--n-unk",
+    "n_unk",
+    default=5000,
+    type=click.INT,
+    show_default=True,
+    help="Number of Unknown verifications (number of lines in the file)",
+)
+@click.option("--five-col/--four-col", default=False, show_default=True)
 @verbosity_option()
-def gen(outdir, mean_match, mean_non_match, n_probes_per_subjects, n_subjects,\
-        sigma_positive, sigma_negative, n_unknown_subjects,  five_col, **kwargs):
+def gen(
+    outdir,
+    mean_match,
+    mean_non_match,
+    n_probes_per_subject,
+    n_subjects,
+    sigma_positive,
+    sigma_negative,
+    n_unknown_subjects,
+    five_col,
+    force_count,
+    n_pos,
+    n_neg,
+    n_unk,
+    **kwargs,
+):
    """Generate random scores.
+
    Generates random scores in 4col or 5col format. The scores are generated
-    using Gaussian distribution whose mean is an input
+    using Gaussian distribution whose mean and variance are an input
    parameter. The generated scores can be used as hypothetical datasets.
+
+    This command generates scores relative to the number of subjects and
+    probes per subjects, unless the -f flag is set. In that case, the --n-pos
+    and --n-neg options are used as number of genuine and impostor
+    comparisons.
    """
+
+    # Compute the number of verifications needed
+    if force_count:
+        neg_count, pos_count, unknown_count = n_neg, n_pos, n_unk
+    else:
+        # One reference (model), and `n_probes_per_subject` probes per subject
+        neg_count = n_subjects * n_probes_per_subject * (n_subjects - 1)
+        pos_count = n_probes_per_subject * n_subjects
+        unknown_count = n_unknown_subjects * n_subjects * n_probes_per_subject
+
    # Generate the data
-    neg_dev, pos_dev = gen_score_distr(mean_non_match, mean_match, sigma_negative, sigma_positive)
-    neg_eval, pos_eval = gen_score_distr(mean_non_match, mean_match, sigma_negative, sigma_positive)
+    logger.info("Generating dev scores.")
+    neg_dev, pos_dev = gen_score_distr(
+        mean_non_match,
+        mean_match,
+        sigma_negative,
+        sigma_positive,
+        n_neg=neg_count,
+        n_pos=pos_count,
+        seed=0,
+    )
+    logger.info("Generating eval scores.")
+    neg_eval, pos_eval = gen_score_distr(
+        mean_non_match,
+        mean_match,
+        sigma_negative,
+        sigma_positive,
+        n_neg=neg_count,
+        n_pos=pos_count,
+        seed=1,
+    )

    # For simplicity I will use the same distribution for dev-eval
    if n_unknown_subjects:
-        neg_unknown,_ = gen_score_distr(mean_non_match, mean_match, sigma_negative, sigma_positive)
+        logger.info("Generating unknown scores.")
+        neg_unknown, _ = gen_score_distr(
+            mean_non_match,
+            mean_match,
+            sigma_negative,
+            sigma_positive,
+            n_neg=unknown_count,
+            n_pos=0,
+            seed=2,
+        )
    else:
        neg_unknown = None

    # Write the data into files
-    write_scores_to_file(neg_dev, pos_dev,
-                         os.path.join(outdir, 'scores-dev'),
-                         n_subjects, n_probes_per_subjects,
-                         n_unknown_subjects, neg_unknown, five_col)
-
-    write_scores_to_file(neg_eval, pos_eval,
-                         os.path.join(outdir, 'scores-eval'),
-                         n_subjects, n_probes_per_subjects,
-                         n_unknown_subjects, neg_unknown, five_col)
+    logger.info("Saving results.")
+    write_scores_to_file(
+        neg_dev,
+        pos_dev,
+        os.path.join(outdir, "scores-dev"),
+        n_subjects,
+        n_probes_per_subject,
+        n_unknown_subjects,
+        neg_unknown,
+        five_col,
+    )

+    write_scores_to_file(
+        neg_eval,
+        pos_eval,
+        os.path.join(outdir, "scores-eval"),
+        n_subjects,
+        n_probes_per_subject,
+        n_unknown_subjects,
+        neg_unknown,
+        five_col,
+    )
--- a/bob/bio/base/test/test_gen.py
+++ b/bob/bio/base/test/test_gen.py
+#!/usr/bin/env python
+# vim: set fileencoding=utf-8 :
+# Yannick Dayer <yannick.dayer@idiap.ch>
+# Mon 14 Sep 2020 17:00:41 UTC+02
+
+"""Tests for the bob.bio.base.script.gen module
+
+The gen module generates synthetic scores and saves them to a file for
+demonstration and test purpose.
+"""
+
+import os
+import numpy
+
+
+from click.testing import CliRunner
+from bob.extension.scripts.click_helper import assert_click_runner_result
+
+from bob.bio.base.script.gen import gen, gen_score_distr
+
+import logging
+
+logger = logging.getLogger(__name__)
+logger.setLevel(
+    "DEBUG"
+)  # If NOTSET (default), will be changed to ERROR at CliRunner.invoke
+
+
+def test_gen():
+    """
+    Tests that the main gen command works as expected
+    """
+    # Define a click runner to invoke click commands
+    runner = CliRunner()
+
+    with runner.isolated_filesystem():
+
+        temp_path = "./gen_test_temp_dir/"
+        n_subjects = 5
+        n_probes_per_subject = 5
+        n_unknown_subjects = 2
+        n_pos = 10
+        n_neg = 60
+        n_unk = 20
+
+        logger.info("Calling 'gen' with a specific amount of scores.")
+        result = runner.invoke(
+            gen,
+            args=[
+                "-mm",
+                "10",
+                "-mnm",
+                "-10",
+                "-sp",
+                "1",
+                "-sn",
+                "1",
+                "-p",
+                f"{n_probes_per_subject}",
+                "-s",
+                f"{n_subjects}",
+                "-u",
+                f"{n_unknown_subjects}",
+                "-f",
+                "--n-pos",
+                f"{n_pos}",
+                "--n-neg",
+                f"{n_neg}",
+                "--n-unk",
+                f"{n_unk}",
+                f"{temp_path}",
+            ],
+        )
+        assert_click_runner_result(result)
+        assert os.path.exists(
+            os.path.join(temp_path, "scores-dev")
+        ), "dev scores file not created."
+        line_count = 0
+        with open(os.path.join(temp_path, "scores-dev")) as f:
+            for l in f:
+                line_count += 1
+        assert line_count == n_pos + n_neg + n_unk
+        assert os.path.exists(
+            os.path.join(temp_path, "scores-eval")
+        ), "eval scores file not created."
+        line_count = 0
+        with open(os.path.join(temp_path, "scores-eval")) as f:
+            for l in f:
+                line_count += 1
+        assert line_count == n_pos + n_neg + n_unk
+
+        n_subjects = 5
+        n_probes_per_subject = 5
+        n_unknown_subjects = 2
+        n_pos = n_subjects * n_probes_per_subject
+        n_neg = n_subjects * (n_subjects - 1) * n_probes_per_subject
+        n_unk = n_unknown_subjects * n_subjects * n_probes_per_subject
+
+        logger.info("Calling 'gen' without a specific amount.")
+        result = runner.invoke(
+            gen,
+            args=[
+                "-mm",
+                "10",
+                "-mnm",
+                "-10",
+                "-sp",
+                "1",
+                "-sn",
+                "1",
+                "-p",
+                f"{n_probes_per_subject}",
+                "-s",
+                f"{n_subjects}",
+                "-u",
+                f"{n_unknown_subjects}",
+                f"{temp_path}",
+            ],
+        )
+        assert_click_runner_result(result)
+        assert os.path.exists(
+            os.path.join(temp_path, "scores-dev")
+        ), "dev scores file not created."
+        line_count = 0
+        with open(os.path.join(temp_path, "scores-dev")) as f:
+            for l in f:
+                line_count += 1
+        assert line_count == n_pos + n_neg + n_unk
+        assert os.path.exists(
+            os.path.join(temp_path, "scores-eval")
+        ), "eval scores file not created."
+        line_count = 0
+        with open(os.path.join(temp_path, "scores-eval")) as f:
+            for l in f:
+                line_count += 1
+        assert line_count == n_pos + n_neg + n_unk
+
+        n_subjects = 5
+        n_probes_per_subject = 2
+        n_unknown_subjects = 0
+        n_pos = n_subjects * n_probes_per_subject
+        n_neg = n_subjects * (n_subjects - 1) * n_probes_per_subject
+        n_unk = n_unknown_subjects * n_subjects * n_probes_per_subject
+
+        logger.info("Calling 'gen' without unknown subjects.")
+        result = runner.invoke(
+            gen,
+            args=[
+                "-mm",
+                "10",
+                "-mnm",
+                "-10",
+                "-sp",
+                "1",
+                "-sn",
+                "1",
+                "-p",
+                f"{n_probes_per_subject}",
+                "-s",
+                f"{n_subjects}",
+                "-u",
+                f"{n_unknown_subjects}",
+                f"{temp_path}",
+            ],
+        )
+        assert_click_runner_result(result)
+        assert os.path.exists(
+            os.path.join(temp_path, "scores-dev")
+        ), "dev scores file not created."
+        line_count = 0
+        with open(os.path.join(temp_path, "scores-dev")) as f:
+            for l in f:
+                line_count += 1
+        assert line_count == n_pos + n_neg + n_unk
+        assert os.path.exists(
+            os.path.join(temp_path, "scores-eval")
+        ), "eval scores file not created."
+        line_count = 0
+        with open(os.path.join(temp_path, "scores-eval")) as f:
+            for l in f:
+                line_count += 1
+        assert line_count == n_pos + n_neg + n_unk
+
+        n_subjects = 0
+        n_probes_per_subject = 2
+        n_unknown_subjects = 0
+        n_pos = n_subjects * n_probes_per_subject
+        n_neg = n_subjects * (n_subjects - 1) * n_probes_per_subject
+        n_unk = n_unknown_subjects * n_subjects * n_probes_per_subject
+
+        logger.info("Calling 'gen' with no subjects.")
+        result = runner.invoke(
+            gen,
+            args=[
+                "-mm",
+                "10",
+                "-mnm",
+                "-10",
+                "-sp",
+                "1",
+                "-sn",
+                "1",
+                "-p",
+                f"{n_probes_per_subject}",
+                "-s",
+                f"{n_subjects}",
+                "-u",
+                f"{n_unknown_subjects}",
+                f"{temp_path}",
+            ],
+        )
+        assert_click_runner_result(result)
+        assert os.path.exists(
+            os.path.join(temp_path, "scores-dev")
+        ), "dev scores file not created."
+        line_count = 0
+        with open(os.path.join(temp_path, "scores-dev")) as f:
+            for l in f:
+                line_count += 1
+        assert line_count == n_pos + n_neg + n_unk
+        assert os.path.exists(
+            os.path.join(temp_path, "scores-eval")
+        ), "eval scores file not created."
+        line_count = 0
+        with open(os.path.join(temp_path, "scores-eval")) as f:
+            for l in f:
+                line_count += 1
+        assert line_count == n_pos + n_neg + n_unk
+
+        n_subjects = 5
+        n_probes_per_subject = 0
+        n_unknown_subjects = 2
+        n_pos = n_subjects * n_probes_per_subject
+        n_neg = n_subjects * (n_subjects - 1) * n_probes_per_subject
+        n_unk = n_unknown_subjects * n_subjects * n_probes_per_subject
+
+        logger.info("Calling 'gen' with no probes.")
+        result = runner.invoke(
+            gen,
+            args=[
+                "-mm",
+                "10",
+                "-mnm",
+                "-10",
+                "-sp",
+                "1",
+                "-sn",
+                "1",
+                "-p",
+                f"{n_probes_per_subject}",
+                "-s",
+                f"{n_subjects}",
+                "-u",
+                f"{n_unknown_subjects}",
+                f"{temp_path}",
+            ],
+        )
+        assert_click_runner_result(result)
+        assert os.path.exists(
+            os.path.join(temp_path, "scores-dev")
+        ), "dev scores file not created."
+        line_count = 0
+        with open(os.path.join(temp_path, "scores-dev")) as f:
+            for l in f:
+                line_count += 1
+        assert line_count == n_pos + n_neg + n_unk
+        assert os.path.exists(
+            os.path.join(temp_path, "scores-eval")
+        ), "eval scores file not created."
+        line_count = 0
+        with open(os.path.join(temp_path, "scores-eval")) as f:
+            for l in f:
+                line_count += 1
+        assert line_count == n_pos + n_neg + n_unk
+
+        n_subjects = 5
+        n_probes_per_subject = 0
+        n_unknown_subjects = 2
+        n_pos = n_subjects * n_probes_per_subject
+        n_neg = n_subjects * (n_subjects - 1) * n_probes_per_subject
+        n_unk = n_unknown_subjects * n_subjects * n_probes_per_subject
+
+        logger.info("Calling 'gen' with only unknowns.")
+        result = runner.invoke(
+            gen,
+            args=[
+                "-mm",
+                "10",
+                "-mnm",
+                "-10",
+                "-sp",
+                "1",
+                "-sn",
+                "1",
+                "-p",
+                f"{n_probes_per_subject}",
+                "-s",
+                f"{n_subjects}",
+                "-u",
+                f"{n_unknown_subjects}",
+                f"{temp_path}",
+            ],
+        )
+        assert_click_runner_result(result)
+        assert os.path.exists(
+            os.path.join(temp_path, "scores-dev")
+        ), "dev scores file not created."
+        line_count = 0
+        with open(os.path.join(temp_path, "scores-dev")) as f:
+            for l in f:
+                line_count += 1
+        assert line_count == n_pos + n_neg + n_unk
+        assert os.path.exists(
+            os.path.join(temp_path, "scores-eval")
+        ), "eval scores file not created."
+        line_count = 0
+        with open(os.path.join(temp_path, "scores-eval")) as f:
+            for l in f:
+                line_count += 1
+        assert line_count == n_pos + n_neg + n_unk
+
+
+def test_gen_score_dist():
+    """
+    Tests that the scores generation works as expected
+    """
+    neg, pos = gen_score_distr(
+        mean_neg=-10, mean_pos=10, sigma_neg=1, sigma_pos=1, n_neg=20, n_pos=20, seed=0
+    )
+    assert len(neg) == 20, f"Incorrect number of negative scores generated ({len(neg)})"
+    assert len(pos) == 20, f"Incorrect number of positive scores generated ({len(pos)})"
+    assert all(
+        [isinstance(s, (numpy.floating, float)) for s in neg]
+    ), "A score was not a float"
+    assert all(
+        [isinstance(s, (numpy.floating, float)) for s in pos]
+    ), "A score was not a float"
+    expected_neg = numpy.array(
+        [
+            -8.23594765,
+            -9.59984279,
+            -9.02126202,
+            -7.7591068,
+            -8.13244201,
+            -10.97727788,
+            -9.04991158,
+            -10.15135721,
+            -10.10321885,
+            -9.5894015,
+            -9.85595643,
+            -8.54572649,
+            -9.23896227,
+            -9.87832498,
+            -9.55613677,
+            -9.66632567,
+            -8.50592093,
+            -10.20515826,
+            -9.6869323,
+            -10.85409574,
+        ]
+    )
+    expected_pos = numpy.array(
+        [
+            7.44701018,
+            10.6536186,
+            10.8644362,
+            9.25783498,
+            12.26975462,
+            8.54563433,
+            10.04575852,
+            9.81281615,
+            11.53277921,
+            11.46935877,
+            10.15494743,
+            10.37816252,
+            9.11221425,
+            8.01920353,
+            9.65208785,
+            10.15634897,
+            11.23029068,
+            11.20237985,
+            9.61267318,
+            9.69769725,
+        ]
+    )
+    assert numpy.allclose(neg, expected_neg), "Unexpected score generated"
+    assert numpy.allclose(pos, expected_pos), "Unexpected score generated"