Commit ab52cb5a authored by Tiago de Freitas Pereira's avatar Tiago de Freitas Pereira
Browse files

Updated some reports

parent 3782ada4
Pipeline #51808 failed with stages
in 5 minutes and 39 seconds
"""
Loading functions for some databases
"""
from bob.bio.base.score.load import get_dataframe
def load_meds(filename):
return get_dataframe(filename)
def load_mobio(filename):
return get_dataframe(filename)
def load_morph(filename, possible_races=["A", "B", "H", "W"], genders_considered=["M"]):
negatives, positives = get_dataframe(get_dataframe)
negatives = negatives[
(negatives.bio_ref_age_phd > 20)
& (negatives.probe_age_phd > 20)
& (negatives.bio_ref_sex.isin(genders_considered))
& (negatives.probe_sex.isin(genders_considered))
& (negatives.bio_ref_rac.isin(possible_races))
& (negatives.probe_rac.isin(possible_races))
]
positives = positives[
(positives.bio_ref_age_phd > 20)
& (positives.probe_age_phd > 20)
& (positives.bio_ref_sex.isin(genders_considered))
& (positives.probe_sex.isin(genders_considered))
& (positives.bio_ref_rac.isin(possible_races))
& (positives.probe_rac.isin(possible_races))
]
return negatices, positives
......@@ -125,6 +125,9 @@ def plot_demographic_boxplot(
return scores
def _plot(scores, axes, labels):
# This code raises a warning
# https://github.com/matplotlib/matplotlib/issues/16353
bp_negatives = axes.boxplot(
[scores[s][0] for s in scores],
patch_artist=True,
......@@ -154,7 +157,11 @@ def plot_demographic_boxplot(
# Plotting the boxplots
fig, ax = plt.subplots(figsize=(16, 8))
title = title if percentile is None else title + f" percentile = {percentile}"
title = (
title
if percentile is None
else title + f" - boxplot at percentile = {percentile}"
)
fig.suptitle(title)
negatives_dev_as_dict
......@@ -162,14 +169,17 @@ def plot_demographic_boxplot(
axes = plt.subplot(2, 1, 1)
else:
axes = plt.subplot(1, 1, 1)
plt.title("development set")
def _compute_scores_and_plot(
negatives_as_dict, positives_as_dict, axes, plot_fmrs=True
):
# Computing the scores
scores = _get_scores(negatives_as_dict, positives_as_dict)
# Plotting the box plot
_plot(scores, axes, labels)
plt.grid(True)
if plot_fmrs:
if taus is not None:
......@@ -185,8 +195,8 @@ def plot_demographic_boxplot(
_compute_scores_and_plot(negatives_dev_as_dict, positives_dev_as_dict, axes)
if has_eval():
axes = plt.subplot(2, 1, 2)
axes = plt.subplot(2, 1, 2, sharex=axes)
plt.title("evaluation set")
_compute_scores_and_plot(
negatives_eval_as_dict, positives_eval_as_dict, axes, plot_fmrs=False
)
......@@ -254,6 +264,9 @@ def plot_fdr(
assert len(positives) == len(negatives) == len(labels)
title = "Fairness Discrepancy Rate" if title is None else title
if taus is None:
taus = [compute_fmr_thresholds(n, fmr_thresholds) for n in negatives]
fdrs = [
fairness_discrepancy_rate(
neg,
......@@ -293,6 +306,7 @@ def plot_fmr_fnmr_tradeoff(
positives_eval=None,
label_lookup_table=None,
title="False Match and False non Match trade-off per demographic",
print_fmr_fnmr=False,
):
def has_eval():
return negatives_eval is not None and positives_eval is not None
......@@ -328,9 +342,8 @@ def plot_fmr_fnmr_tradeoff(
fmrs[key].append(fmr)
fnmrs[key].append(fnmr)
# Plottint the FMR and FNMR in two
# Plotting the FMR and FNMR in two
# separated subplots
fig, ax = plt.subplots(figsize=(16, 8))
fig.suptitle(title)
......@@ -351,7 +364,24 @@ def plot_fmr_fnmr_tradeoff(
plt.xlabel("$\\tau=FMR_{10^{-x}}$", fontsize=18)
plt.grid(True)
plt.legend()
## Printing
if print_fmr_fnmr:
def print_table(header, fmrs, title):
from tabulate import tabulate
print(title)
content = [
[l] + [round(value, 3) for value in fmrs[key]]
for key, l in zip(fmrs, labels)
]
print(tabulate([header] + content))
header = ["Ethnicities"] + fmr_thresholds
print_table(header, fmrs, title=f"{title} - FMR")
print_table(header, fnmrs, title=f"{title} - FNMR")
return fig
"""
Standar report API
"""
from .io import load_meds, load_mobio, load_morph
from .plot import plot_demographic_boxplot, plot_fmr_fnmr_tradeoff, plot_fdr
from . import compute_fmr_thresholds
from matplotlib.backends.backend_pdf import PdfPages
def load_dev_eval_scores(scores_dev, scores_eval):
if scores_eval is None:
scores_eval = [None for _ in scores_dev]
negatives_dev = []
positives_dev = []
negatives_eval = []
positives_eval = []
for i, (d, e) in enumerate(zip(scores_dev, scores_eval)):
n_dev, p_dev = load_meds(d)
negatives_dev.append(n_dev)
positives_dev.append(p_dev)
if e is None:
negatives_eval.append(None)
positives_eval.append(None)
else:
n_eval, p_eval = load_meds(e)
negatives_eval.append(n_eval)
positives_eval.append(p_eval)
return negatives_dev, positives_dev, negatives_eval, positives_eval
def meds_report(
scores_dev,
output_filename,
scores_eval=None,
fmr_thresholds=[10 ** i for i in list(range(-6, 0))],
percentile=0.01,
titles=None,
):
label_lookup_table = {
"W__W": "White-White",
"W__B": "White-Black",
"B__W": "Black-White",
"B__B": "Black-Black",
}
variable_suffix = "rac"
pdf = PdfPages(output_filename)
negatives_dev, positives_dev, negatives_eval, positives_eval = load_dev_eval_scores(
scores_dev, scores_eval
)
if negatives_eval[0] is None:
# Compute FDR on the same set if there's no evaluation set
fig = plot_fdr(
negatives_dev, positives_dev, titles, variable_suffix, fmr_thresholds
)
else:
# If there is evaluation set
# compute the decision thresholds
taus = [compute_fmr_thresholds(d, fmr_thresholds) for d in negatives_dev]
fig = plot_fdr(
negatives_dev,
positives_dev,
titles,
variable_suffix,
fmr_thresholds,
taus=taus,
)
pdf.savefig(fig)
for i, (n_dev, p_dev, n_eval, p_eval) in enumerate(
zip(negatives_dev, positives_dev, negatives_eval, positives_eval)
):
title = None if titles is None else titles[i]
fig = plot_demographic_boxplot(
negatives_dev=n_dev,
positives_dev=p_dev,
variable_suffix=variable_suffix,
negatives_eval=n_eval,
positives_eval=p_eval,
label_lookup_table=label_lookup_table,
percentile=percentile,
fmr_thresholds=fmr_thresholds,
title=title,
)
pdf.savefig(fig)
#### PLOTTING THE FMR AND FNMR TRADE OFF
fig = plot_fmr_fnmr_tradeoff(
n_dev,
p_dev,
variable_suffix=variable_suffix,
fmr_thresholds=fmr_thresholds,
negatives_eval=n_eval,
positives_eval=p_eval,
label_lookup_table=label_lookup_table,
print_fmr_fnmr=True,
title=title,
)
pdf.savefig(fig)
pdf.close()
def morph_report(
scores_dev,
output_filename,
scores_eval=None,
fmr_thresholds=[10 ** i for i in list(range(-8, 0))],
percentile=0.01,
titles=None,
possible_races=["A", "B", "H", "W"],
genders_considered=["M"],
):
variables = {
"O": "Other",
"A": "Asian",
"W": "White",
"B": "Black",
"H": "Hispanic",
}
label_lookup_table = dict()
for a in list(variables.keys()):
for b in list(variables.keys()):
label_lookup_table[f"{a}__{b}"] = f"{variables[a]}__{variables[b]}"
variable_suffix = "rac"
pdf = PdfPages(output_filename)
negatives_dev, positives_dev, negatives_eval, positives_eval = load_dev_eval_scores(
scores_dev, scores_eval
)
def filter_out(dataframe):
return dataframe[
(dataframe.bio_ref_age_phd > 20)
& (dataframe.probe_age_phd > 20)
& (dataframe.bio_ref_sex.isin(genders_considered))
& (dataframe.probe_sex.isin(genders_considered))
& (dataframe.bio_ref_rac.isin(possible_races))
& (dataframe.probe_rac.isin(possible_races))
]
negatives_dev = [filter_out(n) for n in negatives_dev]
positives_dev = [filter_out(n) for n in positives_dev]
if negatives_eval[0] is None:
# Compute FDR on the same set if there's no evaluation set
fig = plot_fdr(
negatives_dev, positives_dev, titles, variable_suffix, fmr_thresholds
)
else:
# If there is evaluation set
# compute the decision thresholds
negatives_eval = [filter_out(n) for n in negatives_eval]
positives_eval = [filter_out(n) for n in positives_eval]
taus = [compute_fmr_thresholds(d, fmr_thresholds) for d in negatives_dev]
fig = plot_fdr(
negatives_dev,
positives_dev,
titles,
variable_suffix,
fmr_thresholds,
taus=taus,
)
pdf.savefig(fig)
for i, (n_dev, p_dev, n_eval, p_eval) in enumerate(
zip(negatives_dev, positives_dev, negatives_eval, positives_eval)
):
title = None if titles is None else titles[i]
fig = plot_demographic_boxplot(
negatives_dev=n_dev,
positives_dev=p_dev,
variable_suffix=variable_suffix,
negatives_eval=n_eval,
positives_eval=p_eval,
label_lookup_table=label_lookup_table,
percentile=percentile,
fmr_thresholds=fmr_thresholds,
title=title,
)
pdf.savefig(fig)
#### PLOTTING THE FMR AND FNMR TRADE OFF
fig = plot_fmr_fnmr_tradeoff(
n_dev,
p_dev,
variable_suffix=variable_suffix,
fmr_thresholds=fmr_thresholds,
negatives_eval=n_eval,
positives_eval=p_eval,
print_fmr_fnmr=True,
label_lookup_table=label_lookup_table,
)
pdf.savefig(fig)
pdf.close()
def mobio_report(
scores_dev,
output_filename,
scores_eval=None,
fmr_thresholds=[10 ** i for i in list(range(-6, 0))],
percentile=0.1,
titles=None,
):
label_lookup_table = {
"m__m": "Male-Male",
"m__f": "Male-Female",
"f__m": "Female-Male",
"f__f": "Female-Female",
}
variable_suffix = "gender"
pdf = PdfPages(output_filename)
negatives_dev, positives_dev, negatives_eval, positives_eval = load_dev_eval_scores(
scores_dev, scores_eval
)
if negatives_eval[0] is None:
# Compute FDR on the same set if there's no evaluation set
fig = plot_fdr(
negatives_dev, positives_dev, titles, variable_suffix, fmr_thresholds
)
else:
# If there is evaluation set
# compute the decision thresholds
taus = [compute_fmr_thresholds(d, fmr_thresholds) for d in negatives_dev]
fig = plot_fdr(
negatives_dev,
positives_dev,
titles,
variable_suffix,
fmr_thresholds,
taus=taus,
)
pdf.savefig(fig)
for i, (n_dev, p_dev, n_eval, p_eval) in enumerate(
zip(negatives_dev, positives_dev, negatives_eval, positives_eval)
):
title = None if titles is None else titles[i]
fig = plot_demographic_boxplot(
negatives_dev=n_dev,
positives_dev=p_dev,
variable_suffix=variable_suffix,
negatives_eval=n_eval,
positives_eval=p_eval,
label_lookup_table=label_lookup_table,
percentile=percentile,
fmr_thresholds=fmr_thresholds,
title=title,
)
pdf.savefig(fig)
#### PLOTTING THE FMR AND FNMR TRADE OFF
fig = plot_fmr_fnmr_tradeoff(
n_dev,
p_dev,
variable_suffix=variable_suffix,
fmr_thresholds=fmr_thresholds,
negatives_eval=n_eval,
positives_eval=p_eval,
label_lookup_table=label_lookup_table,
print_fmr_fnmr=True,
title=title,
)
pdf.savefig(fig)
pdf.close()
def casia_africa_report(
scores_dev,
output_filename,
fmr_thresholds=[10 ** i for i in list(range(-8, 0))],
percentile=0.1,
titles=None,
age_interval=[0, 1000],
target_ethnicity=None,
):
variable_suffix = "gender"
pdf = PdfPages(output_filename)
negatives_dev, positives_dev, _, _ = load_dev_eval_scores(scores_dev, None)
def filter_out(dataframe, age_interval, target_ethnicity):
dataframe = dataframe[
dataframe.probe_age.between(age_interval[0], age_interval[1])
& dataframe.bio_ref_age.between(age_interval[0], age_interval[1])
]
if target_ethnicity is None:
return dataframe
dataframe = dataframe[
dataframe.bio_ref_ethnicity
== target_ethnicity & dataframe.probe_ethnicity
== target_ethnicity
]
return dataframe
negatives_dev = [
filter_out(n, age_interval, target_ethnicity) for n in negatives_dev
]
positives_dev = [
filter_out(p, age_interval, target_ethnicity) for p in positives_dev
]
# Compute FDR on the same set if there's no evaluation set
fig = plot_fdr(
negatives_dev, positives_dev, titles, variable_suffix, fmr_thresholds
)
pdf.savefig(fig)
for i, (n_dev, p_dev) in enumerate(zip(negatives_dev, positives_dev)):
title = None if titles is None else titles[i]
fig = plot_demographic_boxplot(
negatives_dev=n_dev,
positives_dev=p_dev,
variable_suffix=variable_suffix,
negatives_eval=None,
positives_eval=None,
percentile=percentile,
fmr_thresholds=fmr_thresholds,
title=title,
)
pdf.savefig(fig)
#### PLOTTING THE FMR AND FNMR TRADE OFF
fig = plot_fmr_fnmr_tradeoff(
n_dev,
p_dev,
variable_suffix=variable_suffix,
fmr_thresholds=fmr_thresholds,
negatives_eval=None,
positives_eval=None,
print_fmr_fnmr=True,
title=title,
)
pdf.savefig(fig)
pdf.close()
from bob.measure.script import common_options
import numpy as np
from bob.bio.demographics.reports import (
meds_report,
morph_report,
mobio_report,
casia_africa_report,
)
import click
@click.command()
@common_options.scores_argument(nargs=-1)
@common_options.titles_option()
@common_options.eval_option()
@common_options.output_plot_file_option(default_out="demographics_meds.pdf")
@click.option(
"--percentile",
type=float,
help="Percentil of the data used to plot the score distribution box-plots",
default=0.1,
)
@click.pass_context
def meds(ctx, scores, evaluation, output, titles, percentile, **kargs):
"""Standard demographics analysis using the MEDS dataset"""
scores = np.array(scores, dtype="object")
if evaluation:
scores_dev = scores[[i for i in list(range(len(scores))) if i % 2 == 0]]
scores_eval = scores[[i for i in list(range(len(scores))) if i % 2 != 0]]
else:
scores_dev = scores
scores_eval = None
meds_report(scores_dev, output, scores_eval, percentile=percentile, titles=titles)
pass
@click.command()
@common_options.scores_argument(nargs=-1)
@common_options.titles_option()
@common_options.eval_option()
@common_options.output_plot_file_option(default_out="demographics_morph.pdf")
@click.option(
"--percentile",
type=float,
help="Percentil of the data used to plot the score distribution box-plots",
default=0.01,
)
@click.pass_context
def morph(ctx, scores, evaluation, output, titles, percentile, **kargs):
"""Standard demographics analysis using the MORPH dataset"""
scores = np.array(scores, dtype="object")
if evaluation:
scores_dev = scores[[i for i in list(range(len(scores))) if i % 2 == 0]]
scores_eval = scores[[i for i in list(range(len(scores))) if i % 2 != 0]]
else:
scores_dev = scores
scores_eval = None
# Hardcoding the percentile of the score distribution
morph_report(scores_dev, output, scores_eval, percentile=percentile, titles=titles)
pass
@click.command()
@common_options.scores_argument(nargs=-1)
@common_options.titles_option()
@common_options.eval_option()
@common_options.output_plot_file_option(default_out="demographics_mobio.pdf")
@click.pass_context
@click.option(
"--percentile",
type=float,
help="Percentil of the data used to plot the score distribution box-plots",
default=0.1,
)