From ab52cb5a1cfa2d1d2f3b984a373149d30e15677b Mon Sep 17 00:00:00 2001
From: Tiago Freitas Pereira <tiagofrepereira@gmail.com>
Date: Fri, 25 Jun 2021 12:50:33 +0200
Subject: [PATCH] Updated some reports

---
 bob/bio/demographics/io.py                  |  38 ++
 bob/bio/demographics/plot.py                |  42 ++-
 bob/bio/demographics/reports.py             | 384 ++++++++++++++++++++
 bob/bio/demographics/script/__init__.py     |   0
 bob/bio/demographics/script/commands.py     | 141 +++++++
 bob/bio/demographics/script/demographics.py |  13 +
 experiments/no-processing/casia-africa.py   | 116 ++++++
 experiments/no-processing/meds.py           |  90 +++++
 experiments/no-processing/mobio.py          |  95 +++++
 experiments/no-processing/morph.py          | 125 +++++++
 experiments/score-norm/meds.py              |  93 +++++
 setup.py                                    |  71 ++--
 12 files changed, 1166 insertions(+), 42 deletions(-)
 create mode 100644 bob/bio/demographics/io.py
 create mode 100644 bob/bio/demographics/reports.py
 create mode 100644 bob/bio/demographics/script/__init__.py
 create mode 100644 bob/bio/demographics/script/commands.py
 create mode 100644 bob/bio/demographics/script/demographics.py
 create mode 100644 experiments/no-processing/casia-africa.py
 create mode 100644 experiments/no-processing/meds.py
 create mode 100644 experiments/no-processing/mobio.py
 create mode 100644 experiments/no-processing/morph.py
 create mode 100644 experiments/score-norm/meds.py

diff --git a/bob/bio/demographics/io.py b/bob/bio/demographics/io.py
new file mode 100644
index 0000000..d97eaf0
--- /dev/null
+++ b/bob/bio/demographics/io.py
@@ -0,0 +1,38 @@
+"""
+Loading functions for some databases
+"""
+
+from bob.bio.base.score.load import get_dataframe
+
+
+def load_meds(filename):
+    return get_dataframe(filename)
+
+
+def load_mobio(filename):
+    return get_dataframe(filename)
+
+
+def load_morph(filename, possible_races=["A", "B", "H", "W"], genders_considered=["M"]):
+
+    negatives, positives = get_dataframe(get_dataframe)
+
+    negatives = negatives[
+        (negatives.bio_ref_age_phd > 20)
+        & (negatives.probe_age_phd > 20)
+        & (negatives.bio_ref_sex.isin(genders_considered))
+        & (negatives.probe_sex.isin(genders_considered))
+        & (negatives.bio_ref_rac.isin(possible_races))
+        & (negatives.probe_rac.isin(possible_races))
+    ]
+
+    positives = positives[
+        (positives.bio_ref_age_phd > 20)
+        & (positives.probe_age_phd > 20)
+        & (positives.bio_ref_sex.isin(genders_considered))
+        & (positives.probe_sex.isin(genders_considered))
+        & (positives.bio_ref_rac.isin(possible_races))
+        & (positives.probe_rac.isin(possible_races))
+    ]
+
+    return negatices, positives
diff --git a/bob/bio/demographics/plot.py b/bob/bio/demographics/plot.py
index 14c5bb8..2dc762b 100644
--- a/bob/bio/demographics/plot.py
+++ b/bob/bio/demographics/plot.py
@@ -125,6 +125,9 @@ def plot_demographic_boxplot(
         return scores
 
     def _plot(scores, axes, labels):
+
+        # This code raises a warning
+        # https://github.com/matplotlib/matplotlib/issues/16353
         bp_negatives = axes.boxplot(
             [scores[s][0] for s in scores],
             patch_artist=True,
@@ -154,7 +157,11 @@ def plot_demographic_boxplot(
     # Plotting the boxplots
     fig, ax = plt.subplots(figsize=(16, 8))
 
-    title = title if percentile is None else title + f" percentile = {percentile}"
+    title = (
+        title
+        if percentile is None
+        else title + f" - boxplot at percentile = {percentile}"
+    )
     fig.suptitle(title)
 
     negatives_dev_as_dict
@@ -162,14 +169,17 @@ def plot_demographic_boxplot(
         axes = plt.subplot(2, 1, 1)
     else:
         axes = plt.subplot(1, 1, 1)
+    plt.title("development set")
 
     def _compute_scores_and_plot(
         negatives_as_dict, positives_as_dict, axes, plot_fmrs=True
     ):
+
         # Computing the scores
         scores = _get_scores(negatives_as_dict, positives_as_dict)
         # Plotting the box plot
         _plot(scores, axes, labels)
+        plt.grid(True)
 
         if plot_fmrs:
             if taus is not None:
@@ -185,8 +195,8 @@ def plot_demographic_boxplot(
     _compute_scores_and_plot(negatives_dev_as_dict, positives_dev_as_dict, axes)
 
     if has_eval():
-        axes = plt.subplot(2, 1, 2)
-
+        axes = plt.subplot(2, 1, 2, sharex=axes)
+        plt.title("evaluation set")
         _compute_scores_and_plot(
             negatives_eval_as_dict, positives_eval_as_dict, axes, plot_fmrs=False
         )
@@ -254,6 +264,9 @@ def plot_fdr(
     assert len(positives) == len(negatives) == len(labels)
     title = "Fairness Discrepancy Rate" if title is None else title
 
+    if taus is None:
+        taus = [compute_fmr_thresholds(n, fmr_thresholds) for n in negatives]
+
     fdrs = [
         fairness_discrepancy_rate(
             neg,
@@ -293,6 +306,7 @@ def plot_fmr_fnmr_tradeoff(
     positives_eval=None,
     label_lookup_table=None,
     title="False Match and False non Match trade-off per demographic",
+    print_fmr_fnmr=False,
 ):
     def has_eval():
         return negatives_eval is not None and positives_eval is not None
@@ -328,9 +342,8 @@ def plot_fmr_fnmr_tradeoff(
             fmrs[key].append(fmr)
             fnmrs[key].append(fnmr)
 
-    # Plottint the FMR and FNMR in two
+    # Plotting the FMR and FNMR in two
     # separated subplots
-
     fig, ax = plt.subplots(figsize=(16, 8))
     fig.suptitle(title)
 
@@ -351,7 +364,24 @@ def plot_fmr_fnmr_tradeoff(
     plt.xlabel("$\\tau=FMR_{10^{-x}}$", fontsize=18)
 
     plt.grid(True)
-
     plt.legend()
 
+    ## Printing
+    if print_fmr_fnmr:
+
+        def print_table(header, fmrs, title):
+            from tabulate import tabulate
+
+            print(title)
+            content = [
+                [l] + [round(value, 3) for value in fmrs[key]]
+                for key, l in zip(fmrs, labels)
+            ]
+
+            print(tabulate([header] + content))
+
+        header = ["Ethnicities"] + fmr_thresholds
+        print_table(header, fmrs, title=f"{title} - FMR")
+        print_table(header, fnmrs, title=f"{title} - FNMR")
+
     return fig
diff --git a/bob/bio/demographics/reports.py b/bob/bio/demographics/reports.py
new file mode 100644
index 0000000..ec2285a
--- /dev/null
+++ b/bob/bio/demographics/reports.py
@@ -0,0 +1,384 @@
+"""
+Standar report API
+"""
+
+from .io import load_meds, load_mobio, load_morph
+from .plot import plot_demographic_boxplot, plot_fmr_fnmr_tradeoff, plot_fdr
+from . import compute_fmr_thresholds
+
+from matplotlib.backends.backend_pdf import PdfPages
+
+
+def load_dev_eval_scores(scores_dev, scores_eval):
+    if scores_eval is None:
+        scores_eval = [None for _ in scores_dev]
+
+    negatives_dev = []
+    positives_dev = []
+
+    negatives_eval = []
+    positives_eval = []
+
+    for i, (d, e) in enumerate(zip(scores_dev, scores_eval)):
+
+        n_dev, p_dev = load_meds(d)
+        negatives_dev.append(n_dev)
+        positives_dev.append(p_dev)
+
+        if e is None:
+            negatives_eval.append(None)
+            positives_eval.append(None)
+        else:
+            n_eval, p_eval = load_meds(e)
+            negatives_eval.append(n_eval)
+            positives_eval.append(p_eval)
+    return negatives_dev, positives_dev, negatives_eval, positives_eval
+
+
+def meds_report(
+    scores_dev,
+    output_filename,
+    scores_eval=None,
+    fmr_thresholds=[10 ** i for i in list(range(-6, 0))],
+    percentile=0.01,
+    titles=None,
+):
+
+    label_lookup_table = {
+        "W__W": "White-White",
+        "W__B": "White-Black",
+        "B__W": "Black-White",
+        "B__B": "Black-Black",
+    }
+
+    variable_suffix = "rac"
+
+    pdf = PdfPages(output_filename)
+
+    negatives_dev, positives_dev, negatives_eval, positives_eval = load_dev_eval_scores(
+        scores_dev, scores_eval
+    )
+
+    if negatives_eval[0] is None:
+
+        # Compute FDR on the same set if there's no evaluation set
+        fig = plot_fdr(
+            negatives_dev, positives_dev, titles, variable_suffix, fmr_thresholds
+        )
+    else:
+        # If there is evaluation set
+        # compute the decision thresholds
+
+        taus = [compute_fmr_thresholds(d, fmr_thresholds) for d in negatives_dev]
+
+        fig = plot_fdr(
+            negatives_dev,
+            positives_dev,
+            titles,
+            variable_suffix,
+            fmr_thresholds,
+            taus=taus,
+        )
+
+    pdf.savefig(fig)
+
+    for i, (n_dev, p_dev, n_eval, p_eval) in enumerate(
+        zip(negatives_dev, positives_dev, negatives_eval, positives_eval)
+    ):
+
+        title = None if titles is None else titles[i]
+
+        fig = plot_demographic_boxplot(
+            negatives_dev=n_dev,
+            positives_dev=p_dev,
+            variable_suffix=variable_suffix,
+            negatives_eval=n_eval,
+            positives_eval=p_eval,
+            label_lookup_table=label_lookup_table,
+            percentile=percentile,
+            fmr_thresholds=fmr_thresholds,
+            title=title,
+        )
+        pdf.savefig(fig)
+
+        #### PLOTTING THE FMR AND FNMR TRADE OFF
+        fig = plot_fmr_fnmr_tradeoff(
+            n_dev,
+            p_dev,
+            variable_suffix=variable_suffix,
+            fmr_thresholds=fmr_thresholds,
+            negatives_eval=n_eval,
+            positives_eval=p_eval,
+            label_lookup_table=label_lookup_table,
+            print_fmr_fnmr=True,
+            title=title,
+        )
+        pdf.savefig(fig)
+
+    pdf.close()
+
+
+def morph_report(
+    scores_dev,
+    output_filename,
+    scores_eval=None,
+    fmr_thresholds=[10 ** i for i in list(range(-8, 0))],
+    percentile=0.01,
+    titles=None,
+    possible_races=["A", "B", "H", "W"],
+    genders_considered=["M"],
+):
+
+    variables = {
+        "O": "Other",
+        "A": "Asian",
+        "W": "White",
+        "B": "Black",
+        "H": "Hispanic",
+    }
+
+    label_lookup_table = dict()
+    for a in list(variables.keys()):
+        for b in list(variables.keys()):
+            label_lookup_table[f"{a}__{b}"] = f"{variables[a]}__{variables[b]}"
+
+    variable_suffix = "rac"
+
+    pdf = PdfPages(output_filename)
+
+    negatives_dev, positives_dev, negatives_eval, positives_eval = load_dev_eval_scores(
+        scores_dev, scores_eval
+    )
+
+    def filter_out(dataframe):
+        return dataframe[
+            (dataframe.bio_ref_age_phd > 20)
+            & (dataframe.probe_age_phd > 20)
+            & (dataframe.bio_ref_sex.isin(genders_considered))
+            & (dataframe.probe_sex.isin(genders_considered))
+            & (dataframe.bio_ref_rac.isin(possible_races))
+            & (dataframe.probe_rac.isin(possible_races))
+        ]
+
+    negatives_dev = [filter_out(n) for n in negatives_dev]
+    positives_dev = [filter_out(n) for n in positives_dev]
+
+    if negatives_eval[0] is None:
+
+        # Compute FDR on the same set if there's no evaluation set
+        fig = plot_fdr(
+            negatives_dev, positives_dev, titles, variable_suffix, fmr_thresholds
+        )
+    else:
+        # If there is evaluation set
+        # compute the decision thresholds
+
+        negatives_eval = [filter_out(n) for n in negatives_eval]
+        positives_eval = [filter_out(n) for n in positives_eval]
+
+        taus = [compute_fmr_thresholds(d, fmr_thresholds) for d in negatives_dev]
+
+        fig = plot_fdr(
+            negatives_dev,
+            positives_dev,
+            titles,
+            variable_suffix,
+            fmr_thresholds,
+            taus=taus,
+        )
+
+    pdf.savefig(fig)
+
+    for i, (n_dev, p_dev, n_eval, p_eval) in enumerate(
+        zip(negatives_dev, positives_dev, negatives_eval, positives_eval)
+    ):
+
+        title = None if titles is None else titles[i]
+
+        fig = plot_demographic_boxplot(
+            negatives_dev=n_dev,
+            positives_dev=p_dev,
+            variable_suffix=variable_suffix,
+            negatives_eval=n_eval,
+            positives_eval=p_eval,
+            label_lookup_table=label_lookup_table,
+            percentile=percentile,
+            fmr_thresholds=fmr_thresholds,
+            title=title,
+        )
+        pdf.savefig(fig)
+
+        #### PLOTTING THE FMR AND FNMR TRADE OFF
+        fig = plot_fmr_fnmr_tradeoff(
+            n_dev,
+            p_dev,
+            variable_suffix=variable_suffix,
+            fmr_thresholds=fmr_thresholds,
+            negatives_eval=n_eval,
+            positives_eval=p_eval,
+            print_fmr_fnmr=True,
+            label_lookup_table=label_lookup_table,
+        )
+        pdf.savefig(fig)
+
+    pdf.close()
+
+
+def mobio_report(
+    scores_dev,
+    output_filename,
+    scores_eval=None,
+    fmr_thresholds=[10 ** i for i in list(range(-6, 0))],
+    percentile=0.1,
+    titles=None,
+):
+
+    label_lookup_table = {
+        "m__m": "Male-Male",
+        "m__f": "Male-Female",
+        "f__m": "Female-Male",
+        "f__f": "Female-Female",
+    }
+
+    variable_suffix = "gender"
+
+    pdf = PdfPages(output_filename)
+
+    negatives_dev, positives_dev, negatives_eval, positives_eval = load_dev_eval_scores(
+        scores_dev, scores_eval
+    )
+
+    if negatives_eval[0] is None:
+
+        # Compute FDR on the same set if there's no evaluation set
+        fig = plot_fdr(
+            negatives_dev, positives_dev, titles, variable_suffix, fmr_thresholds
+        )
+    else:
+        # If there is evaluation set
+        # compute the decision thresholds
+
+        taus = [compute_fmr_thresholds(d, fmr_thresholds) for d in negatives_dev]
+
+        fig = plot_fdr(
+            negatives_dev,
+            positives_dev,
+            titles,
+            variable_suffix,
+            fmr_thresholds,
+            taus=taus,
+        )
+
+    pdf.savefig(fig)
+
+    for i, (n_dev, p_dev, n_eval, p_eval) in enumerate(
+        zip(negatives_dev, positives_dev, negatives_eval, positives_eval)
+    ):
+
+        title = None if titles is None else titles[i]
+
+        fig = plot_demographic_boxplot(
+            negatives_dev=n_dev,
+            positives_dev=p_dev,
+            variable_suffix=variable_suffix,
+            negatives_eval=n_eval,
+            positives_eval=p_eval,
+            label_lookup_table=label_lookup_table,
+            percentile=percentile,
+            fmr_thresholds=fmr_thresholds,
+            title=title,
+        )
+        pdf.savefig(fig)
+
+        #### PLOTTING THE FMR AND FNMR TRADE OFF
+        fig = plot_fmr_fnmr_tradeoff(
+            n_dev,
+            p_dev,
+            variable_suffix=variable_suffix,
+            fmr_thresholds=fmr_thresholds,
+            negatives_eval=n_eval,
+            positives_eval=p_eval,
+            label_lookup_table=label_lookup_table,
+            print_fmr_fnmr=True,
+            title=title,
+        )
+        pdf.savefig(fig)
+
+    pdf.close()
+
+
+def casia_africa_report(
+    scores_dev,
+    output_filename,
+    fmr_thresholds=[10 ** i for i in list(range(-8, 0))],
+    percentile=0.1,
+    titles=None,
+    age_interval=[0, 1000],
+    target_ethnicity=None,
+):
+    variable_suffix = "gender"
+
+    pdf = PdfPages(output_filename)
+
+    negatives_dev, positives_dev, _, _ = load_dev_eval_scores(scores_dev, None)
+
+    def filter_out(dataframe, age_interval, target_ethnicity):
+        dataframe = dataframe[
+            dataframe.probe_age.between(age_interval[0], age_interval[1])
+            & dataframe.bio_ref_age.between(age_interval[0], age_interval[1])
+        ]
+
+        if target_ethnicity is None:
+            return dataframe
+
+        dataframe = dataframe[
+            dataframe.bio_ref_ethnicity
+            == target_ethnicity & dataframe.probe_ethnicity
+            == target_ethnicity
+        ]
+        return dataframe
+
+    negatives_dev = [
+        filter_out(n, age_interval, target_ethnicity) for n in negatives_dev
+    ]
+    positives_dev = [
+        filter_out(p, age_interval, target_ethnicity) for p in positives_dev
+    ]
+
+    # Compute FDR on the same set if there's no evaluation set
+    fig = plot_fdr(
+        negatives_dev, positives_dev, titles, variable_suffix, fmr_thresholds
+    )
+
+    pdf.savefig(fig)
+
+    for i, (n_dev, p_dev) in enumerate(zip(negatives_dev, positives_dev)):
+
+        title = None if titles is None else titles[i]
+
+        fig = plot_demographic_boxplot(
+            negatives_dev=n_dev,
+            positives_dev=p_dev,
+            variable_suffix=variable_suffix,
+            negatives_eval=None,
+            positives_eval=None,
+            percentile=percentile,
+            fmr_thresholds=fmr_thresholds,
+            title=title,
+        )
+        pdf.savefig(fig)
+
+        #### PLOTTING THE FMR AND FNMR TRADE OFF
+        fig = plot_fmr_fnmr_tradeoff(
+            n_dev,
+            p_dev,
+            variable_suffix=variable_suffix,
+            fmr_thresholds=fmr_thresholds,
+            negatives_eval=None,
+            positives_eval=None,
+            print_fmr_fnmr=True,
+            title=title,
+        )
+        pdf.savefig(fig)
+
+    pdf.close()
diff --git a/bob/bio/demographics/script/__init__.py b/bob/bio/demographics/script/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/bob/bio/demographics/script/commands.py b/bob/bio/demographics/script/commands.py
new file mode 100644
index 0000000..1aa4191
--- /dev/null
+++ b/bob/bio/demographics/script/commands.py
@@ -0,0 +1,141 @@
+from bob.measure.script import common_options
+import numpy as np
+from bob.bio.demographics.reports import (
+    meds_report,
+    morph_report,
+    mobio_report,
+    casia_africa_report,
+)
+import click
+
+
+@click.command()
+@common_options.scores_argument(nargs=-1)
+@common_options.titles_option()
+@common_options.eval_option()
+@common_options.output_plot_file_option(default_out="demographics_meds.pdf")
+@click.option(
+    "--percentile",
+    type=float,
+    help="Percentil of the data used to plot the score distribution box-plots",
+    default=0.1,
+)
+@click.pass_context
+def meds(ctx, scores, evaluation, output, titles, percentile, **kargs):
+    """Standard demographics analysis using the MEDS dataset"""
+
+    scores = np.array(scores, dtype="object")
+
+    if evaluation:
+        scores_dev = scores[[i for i in list(range(len(scores))) if i % 2 == 0]]
+        scores_eval = scores[[i for i in list(range(len(scores))) if i % 2 != 0]]
+    else:
+        scores_dev = scores
+        scores_eval = None
+
+    meds_report(scores_dev, output, scores_eval, percentile=percentile, titles=titles)
+
+    pass
+
+
+@click.command()
+@common_options.scores_argument(nargs=-1)
+@common_options.titles_option()
+@common_options.eval_option()
+@common_options.output_plot_file_option(default_out="demographics_morph.pdf")
+@click.option(
+    "--percentile",
+    type=float,
+    help="Percentil of the data used to plot the score distribution box-plots",
+    default=0.01,
+)
+@click.pass_context
+def morph(ctx, scores, evaluation, output, titles, percentile, **kargs):
+    """Standard demographics analysis using the MORPH dataset"""
+
+    scores = np.array(scores, dtype="object")
+    if evaluation:
+        scores_dev = scores[[i for i in list(range(len(scores))) if i % 2 == 0]]
+        scores_eval = scores[[i for i in list(range(len(scores))) if i % 2 != 0]]
+    else:
+        scores_dev = scores
+        scores_eval = None
+
+    # Hardcoding the percentile of the score distribution
+    morph_report(scores_dev, output, scores_eval, percentile=percentile, titles=titles)
+
+    pass
+
+
+@click.command()
+@common_options.scores_argument(nargs=-1)
+@common_options.titles_option()
+@common_options.eval_option()
+@common_options.output_plot_file_option(default_out="demographics_mobio.pdf")
+@click.pass_context
+@click.option(
+    "--percentile",
+    type=float,
+    help="Percentil of the data used to plot the score distribution box-plots",
+    default=0.1,
+)
+def mobio(ctx, scores, evaluation, output, titles, percentile, **kargs):
+    """Standard demographics analysis using the MOBIO dataset"""
+
+    scores = np.array(scores, dtype="object")
+    if evaluation:
+        scores_dev = scores[[i for i in list(range(len(scores))) if i % 2 == 0]]
+        scores_eval = scores[[i for i in list(range(len(scores))) if i % 2 != 0]]
+    else:
+        scores_dev = scores
+        scores_eval = None
+
+    # Hardcoding the percentile of the score distribution
+    mobio_report(scores_dev, output, scores_eval, percentile=percentile, titles=titles)
+
+    pass
+
+
+@click.command()
+@common_options.scores_argument(nargs=-1)
+@common_options.titles_option()
+@common_options.output_plot_file_option(default_out="demographics_casia-africa.pdf")
+@click.option("--min-age", type=int, help="Minimum age", default=0)
+@click.option("--max-age", type=int, help="Maximum age", default=1000)
+@click.option(
+    "--target-demographic",
+    default=None,
+    help="Target demographic ethinicity you want to work with. The Casia-Africa database contains data from 49 ethnical groups. If this variable is not set, all of them will be used",
+)
+@click.option(
+    "--percentile",
+    type=float,
+    help="Percentil of the data used to plot the score distribution box-plots",
+    default=0.1,
+)
+@click.pass_context
+def casia_africa(
+    ctx,
+    scores,
+    output,
+    titles,
+    target_demographic,
+    min_age,
+    max_age,
+    percentile,
+    **kargs
+):
+    """Standard demographics analysis using the CASIA AFRICA dataset"""
+
+    age_interval = (min_age, max_age)
+
+    # Hardcoding the percentile of the score distribution
+
+    casia_africa_report(
+        scores,
+        output,
+        percentile=percentile,
+        titles=titles,
+        age_interval=age_interval,
+        target_ethnicity=target_demographic,
+    )
diff --git a/bob/bio/demographics/script/demographics.py b/bob/bio/demographics/script/demographics.py
new file mode 100644
index 0000000..4c9ccbd
--- /dev/null
+++ b/bob/bio/demographics/script/demographics.py
@@ -0,0 +1,13 @@
+"""The main entry for bob.bio (click-based) scripts.
+"""
+import click
+import pkg_resources
+from click_plugins import with_plugins
+from bob.extension.scripts.click_helper import AliasedGroup
+
+
+@with_plugins(pkg_resources.iter_entry_points("bob.bio.demographics.cli"))
+@click.group(cls=AliasedGroup)
+def demographics():
+    """Demographics commands"""
+    pass
diff --git a/experiments/no-processing/casia-africa.py b/experiments/no-processing/casia-africa.py
new file mode 100644
index 0000000..285beda
--- /dev/null
+++ b/experiments/no-processing/casia-africa.py
@@ -0,0 +1,116 @@
+# Running a batch of experiments on MEDS
+
+import os
+
+from bob.extension import rc
+
+## PLEASE SET
+# bob config set bob.bio.demographics.path [OUTPUT-PATH]
+OUTPUT_PATH = (
+    "./results"
+    if rc.get("bob.bio.demographics.path") is None
+    else rc.get("bob.bio.demographics.path")
+)
+# In case the pipeline fails
+# it tries again for `max_attempts` time
+max_attempts = 5
+
+# DATABASE
+from bob.bio.face.database import CasiaAfricaDatabase
+
+protocol = "ID-V-All-Ep1"
+database = CasiaAfricaDatabase(protocol=protocol)
+database_name = "casia-africa"
+
+
+# Baselines
+
+from bob.bio.face.embeddings.tensorflow import (
+    inception_resnet_v2_msceleb_centerloss_2018,
+    facenet_sanderberg_20170512_110547,
+    resnet50_msceleb_arcface_2021,
+    resnet50_msceleb_arcface_20210521,
+)
+
+from bob.bio.face.embeddings.pytorch import (
+    iresnet100,
+    iresnet50,
+)
+
+from bob.bio.face.embeddings.mxnet import arcface_insightFace_lresnet100
+
+from bob.bio.face.embeddings.opencv import vgg16_oxford_baseline
+
+baselines = [
+    inception_resnet_v2_msceleb_centerloss_2018,
+    facenet_sanderberg_20170512_110547,
+    resnet50_msceleb_arcface_2021,
+    resnet50_msceleb_arcface_20210521,
+    iresnet100,
+    iresnet50,
+    arcface_insightFace_lresnet100,
+    vgg16_oxford_baseline,
+]
+
+
+## Running baselines
+
+from bob.bio.base.script.vanilla_biometrics import execute_vanilla_biometrics
+
+for b in baselines:
+
+    print(f"Running {b.__name__}....")
+
+    for i in range(max_attempts):
+
+        IDIAP = True
+        dask_client = None
+        if IDIAP:
+            from dask.distributed import Client
+
+            from bob.pipelines.distributed.sge import SGEMultipleQueuesCluster
+
+            cluster = SGEMultipleQueuesCluster(min_jobs=1)
+            dask_client = Client(cluster)
+
+        ### Running the baseline
+        output_path = os.path.join(
+            OUTPUT_PATH, "no-processing", database_name, b.__name__
+        )
+
+        try:
+            execute_vanilla_biometrics(
+                b(
+                    annotation_type=database.annotation_type,
+                    fixed_positions=database.fixed_positions,
+                ),
+                database,
+                dask_client,
+                groups=["dev"],
+                output=output_path,
+                write_metadata_scores=True,
+                checkpoint=True,
+                dask_partition_size=200,
+                dask_n_workers=80,
+            )
+
+            execute_vanilla_biometrics(
+                b(
+                    annotation_type=database.annotation_type,
+                    fixed_positions=database.fixed_positions,
+                ),
+                database,
+                dask_client,
+                groups=["eval"],
+                output=output_path,
+                write_metadata_scores=True,
+                checkpoint=True,
+                dask_partition_size=200,
+                dask_n_workers=80,
+            )
+
+        except:
+            print(f"Failed on attempt {i}")
+
+        if dask_client is not None:
+            dask_client.shutdown()
diff --git a/experiments/no-processing/meds.py b/experiments/no-processing/meds.py
new file mode 100644
index 0000000..5771ab7
--- /dev/null
+++ b/experiments/no-processing/meds.py
@@ -0,0 +1,90 @@
+# Running a batch of experiments on MEDS
+
+import os
+
+from bob.extension import rc
+
+## PLEASE SET
+# bob config set bob.bio.demographics.path [OUTPUT-PATH]
+OUTPUT_PATH = (
+    "./results"
+    if rc.get("bob.bio.demographics.path") is None
+    else rc.get("bob.bio.demographics.path")
+)
+
+
+# DATABASE
+from bob.bio.face.database import MEDSDatabase
+
+protocol = "verification_fold1"
+database = MEDSDatabase(protocol=protocol)
+database_name = "meds"
+
+
+# Baselines
+
+from bob.bio.face.embeddings.tensorflow import (
+    inception_resnet_v2_msceleb_centerloss_2018,
+    facenet_sanderberg_20170512_110547,
+    resnet50_msceleb_arcface_2021,
+    resnet50_msceleb_arcface_20210521,
+)
+
+from bob.bio.face.embeddings.pytorch import (
+    iresnet100,
+    iresnet50,
+)
+
+from bob.bio.face.embeddings.mxnet import arcface_insightFace_lresnet100
+
+from bob.bio.face.embeddings.opencv import vgg16_oxford_baseline
+
+
+baselines = [
+    inception_resnet_v2_msceleb_centerloss_2018,
+    facenet_sanderberg_20170512_110547,
+    resnet50_msceleb_arcface_2021,
+    resnet50_msceleb_arcface_20210521,
+    iresnet100,
+    iresnet50,
+    arcface_insightFace_lresnet100,
+    vgg16_oxford_baseline,
+]
+
+
+## Running baselines
+
+from bob.bio.base.script.vanilla_biometrics import execute_vanilla_biometrics
+
+for b in baselines:
+
+    print(f"Running {b.__name__}....")
+
+    IDIAP = True
+    dask_client = None
+    if IDIAP:
+        from dask.distributed import Client
+
+        from bob.pipelines.distributed.sge import SGEMultipleQueuesCluster
+
+        cluster = SGEMultipleQueuesCluster(min_jobs=1)
+        dask_client = Client(cluster)
+
+    ### Running the baseline
+    output_path = os.path.join(OUTPUT_PATH, "no-processing", database_name, b.__name__)
+    execute_vanilla_biometrics(
+        b(
+            annotation_type=database.annotation_type,
+            fixed_positions=database.fixed_positions,
+        ),
+        database,
+        dask_client,
+        groups=["dev", "eval"],
+        output=output_path,
+        write_metadata_scores=True,
+        checkpoint=False,
+        dask_partition_size=200,
+        dask_n_workers=20,
+    )
+    if dask_client is not None:
+        dask_client.shutdown()
diff --git a/experiments/no-processing/mobio.py b/experiments/no-processing/mobio.py
new file mode 100644
index 0000000..b1ff81b
--- /dev/null
+++ b/experiments/no-processing/mobio.py
@@ -0,0 +1,95 @@
+# Running a batch of experiments on MOBIO
+
+import os
+
+from bob.extension import rc
+
+## PLEASE SET
+# bob config set bob.bio.demographics.path [OUTPUT-PATH]
+OUTPUT_PATH = (
+    "./results"
+    if rc.get("bob.bio.demographics.path") is None
+    else rc.get("bob.bio.demographics.path")
+)
+
+
+# DATABASE
+from bob.bio.face.database import MobioDatabase
+
+protocol = "mobile0-male-female"
+database = MobioDatabase(protocol=protocol)
+database_name = "mobio"
+
+
+# Baselines
+
+from bob.bio.face.embeddings.tensorflow import (
+    inception_resnet_v2_msceleb_centerloss_2018,
+    facenet_sanderberg_20170512_110547,
+    resnet50_msceleb_arcface_2021,
+    resnet50_msceleb_arcface_20210521,
+)
+
+from bob.bio.face.embeddings.pytorch import (
+    iresnet100,
+    iresnet50,
+)
+
+from bob.bio.face.embeddings.mxnet import arcface_insightFace_lresnet100
+
+from bob.bio.face.embeddings.opencv import vgg16_oxford_baseline
+
+
+baselines = [
+    inception_resnet_v2_msceleb_centerloss_2018,
+    facenet_sanderberg_20170512_110547,
+    resnet50_msceleb_arcface_2021,
+    resnet50_msceleb_arcface_20210521,
+    iresnet100,
+    iresnet50,
+    arcface_insightFace_lresnet100,
+    vgg16_oxford_baseline,
+]
+# baselines = [
+# resnet50_msceleb_arcface_20210521,
+# ]
+
+
+## Running baselines
+
+from bob.bio.base.script.vanilla_biometrics import execute_vanilla_biometrics
+
+for b in baselines:
+
+    print(f"Running {b.__name__}....")
+
+    IDIAP = True
+    dask_client = None
+    if IDIAP:
+        from dask.distributed import Client
+
+        from bob.pipelines.distributed.sge import SGEMultipleQueuesCluster
+
+        cluster = SGEMultipleQueuesCluster(min_jobs=1)
+        dask_client = Client(cluster)
+
+    ### Running the baseline
+    output_path = os.path.join(OUTPUT_PATH, "no-processing", database_name, b.__name__)
+    execute_vanilla_biometrics(
+        b(
+            annotation_type=database.annotation_type,
+            fixed_positions=database.fixed_positions,
+        ),
+        database,
+        dask_client,
+        groups=["dev", "eval"],
+        output=output_path,
+        write_metadata_scores=True,
+        checkpoint=False,
+        dask_partition_size=200,
+        dask_n_workers=20,
+    )
+    if dask_client is not None:
+        dask_client.shutdown()
+
+print("Done!")
diff --git a/experiments/no-processing/morph.py b/experiments/no-processing/morph.py
new file mode 100644
index 0000000..0ed7bc4
--- /dev/null
+++ b/experiments/no-processing/morph.py
@@ -0,0 +1,125 @@
+# Running a batch of experiments on MEDS
+
+import os
+
+from bob.extension import rc
+
+## PLEASE SET
+# bob config set bob.bio.demographics.path [OUTPUT-PATH]
+OUTPUT_PATH = (
+    "./results"
+    if rc.get("bob.bio.demographics.path") is None
+    else rc.get("bob.bio.demographics.path")
+)
+# In case the pipeline fails
+# it tries again for `max_attempts` time
+max_attempts = 5
+
+# DATABASE
+from bob.bio.face.database import MorphDatabase
+
+protocol = "verification_fold1"
+database = MorphDatabase(protocol=protocol)
+database_name = "morph"
+
+
+# Baselines
+
+from bob.bio.face.embeddings.tensorflow import (
+    inception_resnet_v2_msceleb_centerloss_2018,
+    facenet_sanderberg_20170512_110547,
+    resnet50_msceleb_arcface_2021,
+    resnet50_msceleb_arcface_20210521,
+)
+
+from bob.bio.face.embeddings.pytorch import (
+    iresnet100,
+    iresnet50,
+)
+
+from bob.bio.face.embeddings.mxnet import arcface_insightFace_lresnet100
+
+from bob.bio.face.embeddings.opencv import vgg16_oxford_baseline
+
+"""
+baselines = [
+    inception_resnet_v2_msceleb_centerloss_2018,
+    facenet_sanderberg_20170512_110547,
+    resnet50_msceleb_arcface_2021,
+    resnet50_msceleb_arcface_20210521,
+    iresnet100,
+    iresnet50,
+    arcface_insightFace_lresnet100,
+    vgg16_oxford_baseline,
+]
+
+"""
+
+
+baselines = [
+    inception_resnet_v2_msceleb_centerloss_2018,
+    resnet50_msceleb_arcface_2021,
+]
+
+
+## Running baselines
+
+from bob.bio.base.script.vanilla_biometrics import execute_vanilla_biometrics
+
+for b in baselines:
+
+    print(f"Running {b.__name__}....")
+
+    for i in range(max_attempts):
+
+        IDIAP = True
+        dask_client = None
+        if IDIAP:
+            from dask.distributed import Client
+
+            from bob.pipelines.distributed.sge import SGEMultipleQueuesCluster
+
+            cluster = SGEMultipleQueuesCluster(min_jobs=1)
+            dask_client = Client(cluster)
+
+        ### Running the baseline
+        output_path = os.path.join(
+            OUTPUT_PATH, "no-processing", database_name, b.__name__
+        )
+
+        try:
+            execute_vanilla_biometrics(
+                b(
+                    annotation_type=database.annotation_type,
+                    fixed_positions=database.fixed_positions,
+                ),
+                database,
+                dask_client,
+                groups=["dev"],
+                output=output_path,
+                write_metadata_scores=True,
+                checkpoint=True,
+                dask_partition_size=200,
+                dask_n_workers=80,
+            )
+
+            execute_vanilla_biometrics(
+                b(
+                    annotation_type=database.annotation_type,
+                    fixed_positions=database.fixed_positions,
+                ),
+                database,
+                dask_client,
+                groups=["eval"],
+                output=output_path,
+                write_metadata_scores=True,
+                checkpoint=True,
+                dask_partition_size=200,
+                dask_n_workers=80,
+            )
+
+        except:
+            print(f"Failed on attempt {i}")
+
+        if dask_client is not None:
+            dask_client.shutdown()
diff --git a/experiments/score-norm/meds.py b/experiments/score-norm/meds.py
new file mode 100644
index 0000000..2fe7648
--- /dev/null
+++ b/experiments/score-norm/meds.py
@@ -0,0 +1,93 @@
+# Running a batch of experiments on MEDS
+
+import os
+
+from bob.extension import rc
+
+## PLEASE SET
+# bob config set bob.bio.demographics.path [OUTPUT-PATH]
+OUTPUT_PATH = (
+    "./results"
+    if rc.get("bob.bio.demographics.path") is None
+    else rc.get("bob.bio.demographics.path")
+)
+consider_genuines = False
+ztnorm_cohort_proportion = 1.0
+
+
+# DATABASE
+from bob.bio.face.database import MEDSDatabase
+
+protocol = "verification_fold1"
+database = MEDSDatabase(protocol=protocol)
+database_name = "meds"
+
+
+# Baselines
+
+from bob.bio.face.embeddings.tensorflow import (
+    inception_resnet_v2_msceleb_centerloss_2018,
+    facenet_sanderberg_20170512_110547,
+    resnet50_msceleb_arcface_2021,
+    resnet50_msceleb_arcface_20210521,
+)
+
+from bob.bio.face.embeddings.pytorch import (
+    iresnet100,
+    iresnet50,
+)
+
+from bob.bio.face.embeddings.mxnet import arcface_insightFace_lresnet100
+
+from bob.bio.face.embeddings.opencv import vgg16_oxford_baseline
+
+
+baselines = [
+    inception_resnet_v2_msceleb_centerloss_2018,
+    facenet_sanderberg_20170512_110547,
+    resnet50_msceleb_arcface_2021,
+    resnet50_msceleb_arcface_20210521,
+    iresnet100,
+    iresnet50,
+    arcface_insightFace_lresnet100,
+    vgg16_oxford_baseline,
+]
+
+
+## Running baselines
+from bob.bio.base.pipelines.vanilla_biometrics import execute_vanilla_biometrics_ztnorm
+
+for b in baselines:
+
+    print(f"Running {b.__name__}....")
+
+    IDIAP = True
+    dask_client = None
+    if IDIAP:
+        from dask.distributed import Client
+
+        from bob.pipelines.distributed.sge import SGEMultipleQueuesCluster
+
+        cluster = SGEMultipleQueuesCluster(min_jobs=1)
+        dask_client = Client(cluster)
+
+    ### Running the baseline
+    output_path = os.path.join(OUTPUT_PATH, "score-norm", database_name, b.__name__)
+    execute_vanilla_biometrics_ztnorm(
+        b(
+            annotation_type=database.annotation_type,
+            fixed_positions=database.fixed_positions,
+        ),
+        database,
+        dask_client,
+        groups=["dev", "eval"],
+        output=output_path,
+        write_metadata_scores=True,
+        checkpoint=True,
+        dask_partition_size=100,
+        dask_n_workers=20,
+        consider_genuines=consider_genuines,
+        ztnorm_cohort_proportion=ztnorm_cohort_proportion,
+    )
+    if dask_client is not None:
+        dask_client.shutdown()
diff --git a/setup.py b/setup.py
index 2465625..e92e428 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,8 @@
 # -*- coding: utf-8 -*-
 
 from setuptools import setup, dist
-dist.Distribution(dict(setup_requires=['bob.extension']))
+
+dist.Distribution(dict(setup_requires=["bob.extension"]))
 
 from bob.extension.utils import find_packages
 from bob.extension.utils import load_requirements
@@ -11,51 +12,49 @@ install_requires = load_requirements()
 
 
 setup(
-
-    name='bob.bio.demographics',
+    name="bob.bio.demographics",
     version=open("version.txt").read().rstrip(),
-    description='bob.bio.demographics',
-
-    url='https://gitlab.idiap.ch/bob/bob.bio.demographics',
-    license='BSD',
-
+    description="bob.bio.demographics",
+    url="https://gitlab.idiap.ch/bob/bob.bio.demographics",
+    license="BSD",
     # there may be multiple authors (separate entries by comma)
-    author='Tiago de Freitas Pereira',
-    author_email='tiago.pereira@idiap.ch',
-
+    author="Tiago de Freitas Pereira",
+    author_email="tiago.pereira@idiap.ch",
     # there may be a maintainer apart from the author - you decide
-    #maintainer='?',
-    #maintainer_email='email@example.com',
-
+    # maintainer='?',
+    # maintainer_email='email@example.com',
     # you may add more keywords separating those by commas (a, b, c, ...)
-    keywords = "bob",
-
-    long_description=open('README.rst').read(),
-
+    keywords="bob",
+    long_description=open("README.rst").read(),
     # leave this here, it is pretty standard
     packages=find_packages(),
     include_package_data=True,
-    zip_safe = False,
-
+    zip_safe=False,
     install_requires=install_requires,
-
     entry_points={
-      # add entry points (scripts, bob resources here, if any)
-      },
-
+        "bob.bio.cli": [
+            "demographics          = bob.bio.demographics.script.demographics:demographics",
+        ],
+        "bob.bio.demographics.cli": [
+            "meds          = bob.bio.demographics.script.commands:meds",
+            "morph          = bob.bio.demographics.script.commands:morph",
+            "mobio          = bob.bio.demographics.script.commands:mobio",
+            "casia-africa   = bob.bio.demographics.script.commands:casia_africa ",
+        ],
+    },
     # check classifiers, add and remove as you see fit
     # full list here: https://pypi.org/classifiers/
     # don't remove the Bob framework unless it's not a bob package
-    classifiers = [
-      'Framework :: Bob',
-      'Development Status :: 4 - Beta',
-      'Intended Audience :: Science/Research',
-      'License :: OSI Approved :: BSD License',
-      'Natural Language :: English',
-      'Programming Language :: Python',
-      'Programming Language :: Python :: 3',
-      'Topic :: Scientific/Engineering :: Artificial Intelligence',
-      'Topic :: Software Development :: Libraries :: Python Modules',
-      ],
+    classifiers=[
+        "Framework :: Bob",
+        "Development Status :: 4 - Beta",
+        "Intended Audience :: Science/Research",
+        "License :: OSI Approved :: BSD License",
+        "Natural Language :: English",
+        "Programming Language :: Python",
+        "Programming Language :: Python :: 3",
+        "Topic :: Scientific/Engineering :: Artificial Intelligence",
+        "Topic :: Software Development :: Libraries :: Python Modules",
+    ],
+)
 
-)
\ No newline at end of file
-- 
GitLab