Commit d13e8270 authored by Tiago de Freitas Pereira's avatar Tiago de Freitas Pereira
Browse files

Finished demographic plots

parent 0a4b78ba
Pipeline #51741 failed with stages
in 1 minute and 48 seconds
......@@ -4,5 +4,10 @@ from pkgutil import extend_path
__path__ = extend_path(__path__, __name__)
from .scores import compute_fmr_thresholds, split_scores_by_variable
from .scores import (
compute_fmr_thresholds,
split_scores_by_variable,
fairness_discrepancy_rate,
compute_fdr,
)
......@@ -2,8 +2,9 @@
Some plotting demographic plotting mechanisms
"""
from . import split_scores_by_variable, compute_fmr_thresholds
from . import split_scores_by_variable, compute_fmr_thresholds, compute_fdr
import matplotlib.pyplot as plt
import bob.measure
def plot_demographic_boxplot(
......@@ -44,7 +45,9 @@ def plot_demographic_boxplot(
label_lookup_table: dict
Lookup table mapping `variable` to the actual label of the variable
percentile=None,
percentile: float
If set, it will plit
title="",
......@@ -191,3 +194,164 @@ def plot_demographic_boxplot(
fig.legend()
return fig
def plot_fdr(
negatives,
positives,
labels,
variable_suffix,
fmr_thresholds,
fdr_fn=compute_fdr,
alpha=0.5,
beta=0,
taus=None,
title=None,
):
"""
Return the Fairness Discrepancy Rate PLOT from the paper
de Freitas Pereira, Tiago, and Sébastien Marcel. "Fairness in Biometrics: a figure of merit to assess biometric verification systems." arXiv e-prints (2020): arXiv-2011.
Parameters
----------
negatives: dataframe
Pandas Dataframe containing the negative scores (or impostor scores, or even non-mated scores)
positives: dataframe
Pandas Dataframe containing the positive scores (or genuines scores, or even mated scores)
labels: list
Labels of the experiments
variable_suffix: str
The suffix of a variable that will be appended to `bio_ref_[variable_suffix]` for biometric references
and `probe_[variable_suffix]` that will be appended to probes.
fmr_thresholds: list
List containing the FMR operational points
fdr_fn:
Function used to compute the FDR
alpha: float
FDR hyperparameter `alpha`
beta: float
FDR hyperparameter `beta` (the implementation of NIST admits such parameter)
taus: list of list
Decision thresholds. If `None`, it will be computed on the fly
using `fmr_thresholds`
"""
from . import fairness_discrepancy_rate
assert len(positives) == len(negatives) == len(labels)
title = "Fairness Discrepancy Rate" if title is None else title
fdrs = [
fairness_discrepancy_rate(
neg,
pos,
variable_suffix,
fmr_thresholds,
fdr_fn=fdr_fn,
alpha=alpha,
beta=beta,
taus=t,
)
for neg, pos, t in zip(negatives, positives, taus)
]
fig, ax = plt.subplots(figsize=(16, 8))
fig.suptitle(title)
[plt.semilogx(fmr_thresholds, f, label=l) for f, l in zip(fdrs, labels)]
[plt.scatter(fmr_thresholds, f) for f in fdrs]
plt.ylabel("$FDR(\\tau)$", fontsize=18)
plt.xlabel("$\\tau=FMR_{10^{-x}}$", fontsize=18)
plt.grid(True)
plt.legend()
return fig
def plot_fmr_fnmr_tradeoff(
negatives_dev,
positives_dev,
variable_suffix,
fmr_thresholds,
negatives_eval=None,
positives_eval=None,
label_lookup_table=None,
title="False Match and False non Match trade-off per demographic",
):
def has_eval():
return negatives_eval is not None and positives_eval is not None
# Computing decision thresholds if we have any FMR
taus = compute_fmr_thresholds(negatives_dev, fmr_thresholds)
# If we have an evaluation set, do the plots with the evaluation set,
# otherwise uses the development set
if has_eval():
negatives_as_dict, positives_as_dict = split_scores_by_variable(
negatives_eval, positives_eval, variable_suffix
)
else:
negatives_as_dict, positives_as_dict = split_scores_by_variable(
negatives_dev, positives_dev, variable_suffix
)
# Iterating ONLY on comparisons of the same
# demographic group
fmrs = dict()
fnmrs = dict()
for key in positives_as_dict:
fmrs[key] = []
fnmrs[key] = []
for t in taus:
fmr, fnmr = bob.measure.farfrr(
negatives_as_dict[key]["score"].to_numpy(),
positives_as_dict[key]["score"].to_numpy(),
t,
)
fmrs[key].append(fmr)
fnmrs[key].append(fnmr)
# Plottint the FMR and FNMR in two
# separated subplots
fig, ax = plt.subplots(figsize=(16, 8))
fig.suptitle(title)
labels = list(positives_as_dict.keys())
if label_lookup_table is not None:
labels = [label_lookup_table[l] for l in labels]
# Plot FNMR
axes = plt.subplot(2, 1, 1)
[plt.semilogx(fmr_thresholds, fnmrs[f], label=l) for f, l in zip(fnmrs, labels)]
plt.ylabel("$FNMR(\\tau)$", fontsize=18)
plt.grid(True)
axes = plt.subplot(2, 1, 2)
[plt.semilogx(fmr_thresholds, fmrs[f], label=l) for f, l in zip(fmrs, labels)]
plt.ylabel("$FMR(\\tau)$", fontsize=18)
plt.xlabel("$\\tau=FMR_{10^{-x}}$", fontsize=18)
plt.grid(True)
plt.legend()
return fig
......@@ -111,3 +111,89 @@ def split_scores_by_variable(negatives, positives, variable_suffix):
return negatives_as_dict, positives_as_dict
def compute_fdr(A_tau, B_tau, alpha=0.5):
"""
Compute the original fairness discrepancy rate
"""
max_A_tau = max([abs(x - y) for x in A_tau for y in A_tau])
max_B_tau = max([abs(x - y) for x in B_tau for y in B_tau])
return 1 - ((alpha * max_A_tau + (1 - alpha) * max_B_tau))
def fairness_discrepancy_rate(
negatives,
positives,
variable_suffix,
fmr_thresholds,
fdr_fn=compute_fdr,
alpha=0.5,
beta=0,
taus=None,
):
"""
Return the Fairness Discrepancy Rate from the paper
de Freitas Pereira, Tiago, and Sébastien Marcel. "Fairness in Biometrics: a figure of merit to assess biometric verification systems." arXiv e-prints (2020): arXiv-2011.
Parameters
----------
negatives: dataframe
Pandas Dataframe containing the negative scores (or impostor scores, or even non-mated scores)
positives: dataframe
Pandas Dataframe containing the positive scores (or genuines scores, or even mated scores)
variable_suffix: str
The suffix of a variable that will be appended to `bio_ref_[variable_suffix]` for biometric references
and `probe_[variable_suffix]` that will be appended to probes.
fmr_thresholds: list
List containing the FMR operational points
fdr_fn:
Function used to compute the FDR
alpha: float
FDR hyperparameter `alpha`
beta: float
FDR hyperparameter `beta` (the implementation of NIST admits such parameter)
taus: list
Decision thresholds. If `None`, it will be computed on the fly
using `fmr_thresholds`
"""
# Getting the decision thresholds independent of demographics
taus = compute_fmr_thresholds(negatives, fmr_thresholds) if taus is None else taus
# Spliting the scores per demographics
negatives_as_dict, positives_as_dict = split_scores_by_variable(
negatives, positives, variable_suffix
)
fdrs = []
for t in taus:
# Iterating ONLY on comparisons of the same
# demographic group
A_tau = []
B_tau = []
for key in positives_as_dict:
a_tau, b_tau = bob.measure.farfrr(
negatives_as_dict[key]["score"].to_numpy(),
positives_as_dict[key]["score"].to_numpy(),
t,
)
A_tau.append(a_tau)
B_tau.append(b_tau)
fdrs.append(fdr_fn(A_tau, B_tau, alpha=alpha))
return fdrs
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment