Commit 35a813df authored by Tiago de Freitas Pereira's avatar Tiago de Freitas Pereira
Browse files

Update

parent 0f494b10
Pipeline #53235 failed with stages
in 8 minutes and 22 seconds
......@@ -90,6 +90,7 @@ def plot_demographic_boxplot(
"""
scores = dict()
for n in negatives_as_dict:
# for n in positives_as_dict:
# Filtering out by percentile
if percentile is not None:
negatives = negatives_as_dict[n][
......@@ -128,7 +129,6 @@ def plot_demographic_boxplot(
return scores
def _plot(scores, axes, labels):
# This code raises a warning
# https://github.com/matplotlib/matplotlib/issues/16353
bp_negatives = axes.boxplot(
......@@ -153,6 +153,7 @@ def plot_demographic_boxplot(
# Matching the variable values to
# the actual labels for readability
# labels = list(positives_dev_as_dict.keys())
labels = list(negatives_dev_as_dict.keys())
if label_lookup_table is not None:
labels = [label_lookup_table[l] for l in labels]
......@@ -165,14 +166,14 @@ def plot_demographic_boxplot(
if percentile is None
else title + f" - boxplot at percentile = {percentile}"
)
fig.suptitle(title)
# fig.suptitle(title)
negatives_dev_as_dict
if has_eval():
axes = plt.subplot(2, 1, 1)
else:
axes = plt.subplot(1, 1, 1)
plt.title("development set")
# plt.title("development set")
def _compute_scores_and_plot(
negatives_as_dict, positives_as_dict, axes, plot_fmrs=True
......@@ -183,14 +184,18 @@ def plot_demographic_boxplot(
# Plotting the box plot
_plot(scores, axes, labels)
plt.grid(True)
plt.yticks(fontsize=18)
if plot_fmrs:
if taus is not None:
colors = list(plt.cm.get_cmap("tab20").colors)
# axes.axvline(
# t, linestyle="--", label=f"FMR {f} in the dev set", color=c
# )
[
axes.axvline(
t, linestyle="--", label=f"FMR {f} in the dev set", color=c
t, linestyle="--", label="$\\tau=FMR_{" + str(f) + "}$", color=c
)
for t, c, f in zip(taus, colors, fmr_thresholds)
]
......@@ -204,7 +209,7 @@ def plot_demographic_boxplot(
negatives_eval_as_dict, positives_eval_as_dict, axes, plot_fmrs=False
)
fig.legend()
fig.legend(loc=2, fontsize=16)
return fig
......@@ -333,36 +338,53 @@ def plot_fmr_fnmr_tradeoff(
fmrs = dict()
fnmrs = dict()
for key in positives_as_dict:
# for key in positives_as_dict:
for key in negatives_as_dict:
fmrs[key] = []
fnmrs[key] = []
if key in positives_as_dict:
fnmrs[key] = []
for t in taus:
fmr, fnmr = bob.measure.farfrr(
negatives_as_dict[key]["score"].compute().to_numpy(),
positives_as_dict[key]["score"].compute().to_numpy(),
t,
)
if key in positives_as_dict:
fmr, fnmr = bob.measure.farfrr(
negatives_as_dict[key]["score"].compute().to_numpy(),
positives_as_dict[key]["score"].compute().to_numpy(),
t,
)
fnmrs[key].append(fnmr)
else:
fmr, _ = bob.measure.farfrr(
negatives_as_dict[key]["score"].compute().to_numpy(), [0.0], t,
)
fmrs[key].append(fmr)
fnmrs[key].append(fnmr)
# Plotting the FMR and FNMR in two
# separated subplots
fig, ax = plt.subplots(figsize=(16, 8))
fig.suptitle(title)
labels = list(positives_as_dict.keys())
# LABELS FOR FNMR
labels_fnmr = list(positives_as_dict.keys())
if label_lookup_table is not None:
labels = [label_lookup_table[l] for l in labels]
labels_fnmr = [label_lookup_table[l] for l in labels_fnmr]
# Plot FNMR
axes = plt.subplot(2, 1, 1)
[plt.semilogx(fmr_thresholds, fnmrs[f], label=l) for f, l in zip(fnmrs, labels)]
[
plt.semilogx(fmr_thresholds, fnmrs[f], label=l)
for f, l in zip(fnmrs, labels_fnmr)
]
plt.ylabel("$FNMR(\\tau)$", fontsize=18)
plt.grid(True)
# LABELS FOR FMR
labels_fmr = list(negatives_as_dict.keys())
if label_lookup_table is not None:
labels_fmr = [label_lookup_table[l] for l in labels_fmr]
axes = plt.subplot(2, 1, 2)
[plt.semilogx(fmr_thresholds, fmrs[f], label=l) for f, l in zip(fmrs, labels)]
[plt.semilogx(fmr_thresholds, fmrs[f], label=l) for f, l in zip(fmrs, labels_fmr)]
plt.ylabel("$FMR(\\tau)$", fontsize=18)
plt.xlabel("$\\tau=FMR_{10^{-x}}$", fontsize=18)
......@@ -372,7 +394,7 @@ def plot_fmr_fnmr_tradeoff(
## Printing
if print_fmr_fnmr:
def print_table(header, fmrs, title):
def print_table(header, fmrs, title, labels):
from tabulate import tabulate
print(title)
......@@ -381,10 +403,10 @@ def plot_fmr_fnmr_tradeoff(
for key, l in zip(fmrs, labels)
]
print(tabulate([header] + content))
print(tabulate([header] + content, tablefmt="rst"))
header = ["Ethnicities"] + fmr_thresholds
print_table(header, fmrs, title=f"{title} - FMR")
print_table(header, fnmrs, title=f"{title} - FNMR")
print_table(header, fmrs, title=f"{title} - FMR", labels=labels_fmr)
print_table(header, fnmrs, title=f"{title} - FNMR", labels=labels_fnmr)
return fig
......@@ -134,13 +134,13 @@ def morph_report(
"A": "Asian",
"W": "White",
"B": "Black",
"H": "Hispanic",
"H": "Hisp.",
}
label_lookup_table = dict()
for a in list(variables.keys()):
for b in list(variables.keys()):
label_lookup_table[f"{a}__{b}"] = f"{variables[a]}__{variables[b]}"
label_lookup_table[f"{a}__{b}"] = f"{variables[a]}-{variables[b]}"
variable_suffix = "rac"
......@@ -178,16 +178,16 @@ def morph_report(
taus = [compute_fmr_thresholds(d, fmr_thresholds) for d in negatives_dev]
fig = plot_fdr(
negatives_dev,
positives_dev,
titles,
variable_suffix,
fmr_thresholds,
taus=taus,
)
# fig = plot_fdr(
# negatives_dev,
# positives_dev,
# titles,
# variable_suffix,
# fmr_thresholds,
# taus=taus,
# )
pdf.savefig(fig)
# pdf.savefig(fig)
for i, (n_dev, p_dev, n_eval, p_eval) in enumerate(
zip(negatives_dev, positives_dev, negatives_eval, positives_eval)
......@@ -209,17 +209,17 @@ def morph_report(
pdf.savefig(fig)
#### PLOTTING THE FMR AND FNMR TRADE OFF
fig = plot_fmr_fnmr_tradeoff(
n_dev,
p_dev,
variable_suffix=variable_suffix,
fmr_thresholds=fmr_thresholds,
negatives_eval=n_eval,
positives_eval=p_eval,
print_fmr_fnmr=True,
label_lookup_table=label_lookup_table,
)
pdf.savefig(fig)
# fig = plot_fmr_fnmr_tradeoff(
# n_dev,
# p_dev,
# variable_suffix=variable_suffix,
# fmr_thresholds=fmr_thresholds,
# negatives_eval=n_eval,
# positives_eval=p_eval,
# print_fmr_fnmr=True,
# label_lookup_table=label_lookup_table,
# )
# pdf.savefig(fig)
pdf.close()
......@@ -393,6 +393,8 @@ def rfw_report(
possible_races=["Asian", "African", "Caucasian", "Indian"],
):
possible_genders = ["male", "female"]
variable_suffix = "race"
pdf = PdfPages(output_filename)
......@@ -403,39 +405,58 @@ def rfw_report(
# Compute FDR on the same set if there's no evaluation set
taus = [compute_fmr_thresholds(d, fmr_thresholds) for d in negatives_dev]
fig = plot_fdr(
negatives_dev,
positives_dev,
titles,
variable_suffix,
fmr_thresholds,
taus=taus,
)
def filter_gender(df, gender):
return df.loc[(df.probe_gender == gender) & (df.bio_ref_gender == gender)]
pdf.savefig(fig)
for gender in possible_genders:
for i, (n_dev, p_dev) in enumerate(zip(negatives_dev, positives_dev)):
filtered_negatives_dev = []
filtered_positives_dev = []
title = None if titles is None else titles[i]
for n, p in zip(negatives_dev, positives_dev):
filtered_negatives_dev.append(filter_gender(n, gender))
filtered_positives_dev.append(filter_gender(p, gender))
fig = plot_demographic_boxplot(
negatives_dev=n_dev,
positives_dev=p_dev,
variable_suffix=variable_suffix,
percentile=percentile,
fmr_thresholds=fmr_thresholds,
title=title,
fig = plot_fdr(
filtered_negatives_dev,
filtered_positives_dev,
titles,
variable_suffix,
fmr_thresholds,
taus=taus,
title=f"Fairness Discrepancy rate - {gender}",
)
pdf.savefig(fig)
#### PLOTTING THE FMR AND FNMR TRADE OFF
fig = plot_fmr_fnmr_tradeoff(
n_dev,
p_dev,
variable_suffix=variable_suffix,
fmr_thresholds=fmr_thresholds,
print_fmr_fnmr=True,
)
pdf.savefig(fig)
for i, (n_dev, p_dev) in enumerate(
zip(filtered_negatives_dev, filtered_positives_dev)
):
title = None if titles is None else f"titles[i] - {gender}"
fig = plot_demographic_boxplot(
negatives_dev=n_dev,
positives_dev=p_dev,
variable_suffix=variable_suffix,
percentile=percentile,
fmr_thresholds=fmr_thresholds,
title=title,
)
pdf.savefig(fig)
title = (
f"False Match and False non Match trade-off per demographic - {gender}"
)
#### PLOTTING THE FMR AND FNMR TRADE OFF
fig = plot_fmr_fnmr_tradeoff(
n_dev,
p_dev,
variable_suffix=variable_suffix,
fmr_thresholds=fmr_thresholds,
print_fmr_fnmr=True,
title=title,
)
pdf.savefig(fig)
pdf.close()
......@@ -75,7 +75,14 @@ def morph(ctx, scores, evaluation, output, titles, percentile, dask_client, **ka
scores_eval = None
# Hardcoding the percentile of the score distribution
morph_report(scores_dev, output, scores_eval, percentile=percentile, titles=titles)
morph_report(
scores_dev,
output,
scores_eval,
percentile=percentile,
titles=titles,
fmr_thresholds=[10 ** i for i in list(range(-6, -2))],
)
dask_client.shutdown()
......
......@@ -2,7 +2,7 @@
Base tests
"""
import pandas as pd
from bob.bio.base.score.load import get_dataframe
from bob.bio.base.score.load import get_split_dataframe
from bob.bio.demographics import compute_fmr_thresholds, split_scores_by_variable
import pkg_resources
import numpy as np
......@@ -14,7 +14,7 @@ def test_fmr_thresholds():
"bob.bio.demographics.test", "data/test_scores.csv"
)
negatives, positives = get_dataframe(score_file)
negatives, positives = get_split_dataframe(score_file)
taus = compute_fmr_thresholds(negatives, fmrs=[0.1, 0.01, 0.001])
......@@ -29,7 +29,7 @@ def test_split_scores_by_variable():
"bob.bio.demographics.test", "data/test_scores.csv"
)
negatives, positives = get_dataframe(score_file)
negatives, positives = get_split_dataframe(score_file)
variable_suffix = "rac" # Rac is `W` or `B` in this dataset
negatives_as_dict, positives_as_dict = split_scores_by_variable(
......
......@@ -32,6 +32,7 @@ requirements:
- setuptools
- bob.measure
- bob.bio.base
- bob.bio.face
# place other runtime dependencies here (same as requirements.txt)
test:
......
.. -*- coding: utf-8 -*-
.. _bob.bio.demographics.fdr:
==========================
Fairness Discrepancy Rate
==========================
Here we discuss the fairness discrepancy rate (FDR) proposed in::
@article{de2020fairness,
title={Fairness in Biometrics: a figure of merit to assess biometric verification systems},
author={de Freitas Pereira, Tiago and Marcel, S{\'e}bastien},
journal={arXiv preprint arXiv:2011.02395},
year={2020}
}
In this work, a biometric verification system is considered fair if statistical parity between groups is reached in terms of both `FMR` (False Match Rate) and
`FNMR` (False Non Match Rate) for a given decision threshold :math:`\tau`.
More formally, given a set of demographic groups :math:`\mathcal{D}=\{d_1,d_2,...,d_n\}`, and :math:`\tau = \text{FMR}_{x}`, a biometric verification system is considered
fair with respect to `FMR` if the following premisse holds:
.. math::
\text{FMR}^{d_i}(\tau) \geq \text{FMR}^{d_j}(\tau) - \epsilon \text{ } \forall d_i,d_j \in D
Such premisse can be written with the following equation:
.. math::
A(\tau) = \max(|\text{FMR}^{d_i}(\tau)- \text{FMR}^{d_j}(\tau)|) \leq \epsilon \text{ } \forall d_i, d_j \in \mathcal{D}
Conversely, in terms of :math:`\text{FNMR}`, a biometric verification system is considered fair if the following premisse holds:
.. math::
\text{FNMR}^{d_i}(\tau) \geq \text{FNMR}^{d_j}(\tau) \text{ } \forall d_i,d_j \in D$.
Such premisse can be written with the following equation:
.. math::
B(\tau) = \max(|\text{FNMR}^{d_i}(\tau)- \text{FNMR}^{d_j}(\tau)|) \leq \epsilon \text{ } \forall d_i, d_j \in \mathcal{D}.
Since `A` and `B` are functions of :math:`\tau`, both can be summarized in one figure of merit, that we refer as Fairness Discrepancy Rate (FDR) which is defined as:
.. math::
FDR(\tau) = 1- (\alpha A(\tau) + (1-\alpha) B(\tau)),
where :math:`\alpha` is a hyper-parameter that defines the weight of :math:`A(\tau)` in the figure of merit (the importance of False Matches).
To see how FDR behaves in situations of fair/unfair score distributions, please check this `link <https://mybinder.org/v2/gh/tiagofrepereira2012/fdr/master?filepath=1.fair_unfair_example.ipynb>`_.
......@@ -2,13 +2,18 @@
.. _bob.bio.demographics:
======================
bob.bio.demographics
======================
=======================
Demographics analysis
=======================
.. todo ::
Write here a small (1 paragraph) introduction explaining this project. See
other projects for examples.
This package contains an API to analyse demographic differentials in biometric scores generated with :ref:`bob.bio.base <bob.bio.base>`.
Furtheremore, it has customized reports for the following databases:
- MEDS II
- MORPH
- MOBIO
- RFW
- CASIA-AFRICA
......@@ -17,9 +22,11 @@ Users Guide
.. toctree::
:maxdepth: 2
setup
fairness_discrepancy_rate
plots/index
references
api
.. todolist::
.. include:: links.rst
\ No newline at end of file
.. -*- coding: utf-8 -*-
.. _bob.bio.demographics.plots:
===================
Analysis and plots
===================
This section will show some plots with SOTA face recognition systems and their assessment with respect to demographic differentials.
The analysis on some datasets can be found in::
@article{de2020fairness,
title={Fairness in Biometrics: a figure of merit to assess biometric verification systems},
author={de Freitas Pereira, Tiago and Marcel, S{\'e}bastien},
journal={arXiv preprint arXiv:2011.02395},
year={2020}
}
.. toctree::
meds/meds
.. =*= coding: utf=8 =*=
.. _bob.bio.demographics.meds.analysis:
========
MEDS II
========
Follow below the demographic differentials considering some FR baselines using the :ref:`Fairness Discrepancy Rate <bob.bio.demographics.fdr>` .
.. figure:: meds/meds.png
:align: center
:alt: Fairness Discrepancy Rate
FDR and FMR/FNMR trade-offs (:download:`pdf <meds/meds.pdf>`)
Facenet-sanderberg
------------------
`Facenet <https://github.com/davidsandberg/facenet>`_ model.
FMR
===
=========== ===== ===== ====== ===== ===== =====
Ethnicities 1e-06 1e-05 0.0001 0.001 0.01 0.1
White-Black 0 0 0 0 0 0.019
White-White 0 0 0 0 0.004 0.067
Black-Black 0 0 0 0.002 0.032 0.289
Black-White 0 0 0 0 0 0.017
=========== ===== ===== ====== ===== ===== =====
FNMR
====
=========== ===== ===== ====== ===== ===== ===
Ethnicities 1e-06 1e-05 0.0001 0.001 0.01 0.1
White-White 0.131 0.131 0.119 0.06 0.012 0
Black-Black 0.08 0.08 0.08 0.013 0 0
=========== ===== ===== ====== ===== ===== ===
iResnet 100
-----------
`Arcface (iResnet 101) model <https://github.com/deepinsight/insightface>`_ ported to pytorch.
FMR
===
=========== ===== ===== ====== ===== ===== =====
Ethnicities 1e-06 1e-05 0.0001 0.001 0.01 0.1
White-Black 0 0 0 0 0.003 0.065
White-White 0 0 0 0 0.01 0.117
Black-Black 0 0 0 0.002 0.019 0.144
Black-White 0 0 0 0 0.003 0.071
=========== ===== ===== ====== ===== ===== =====
FNMR
====
=========== ===== ===== ====== ===== ==== ===
Ethnicities 1e-06 1e-05 0.0001 0.001 0.01 0.1
White-White 0.012 0.012 0 0 0 0
Black-Black 0.004 0.004 0 0 0 0
=========== ===== ===== ====== ===== ==== ===
iResnet 50
----------
`Arcface (iResnet 50) model <https://github.com/deepinsight/insightface>`_ ported to pytorch.
FMR
===
=========== ===== ===== ====== ===== ===== =====
Ethnicities 1e-06 1e-05 0.0001 0.001 0.01 0.1
White-Black 0 0 0 0 0.002 0.051
White-White 0 0 0 0.001 0.008 0.109
Black-Black 0 0 0 0.002 0.022 0.161
Black-White 0 0 0 0 0.002 0.056
=========== ===== ===== ====== ===== ===== =====
FNMR
====
=========== ===== ===== ====== ===== ===== =====
Ethnicities 1e-06 1e-05 0.0001 0.001 0.01 0.1
White-White 0.071 0.071 0.06 0.06 0.036 0.024
Black-Black 0.093 0.093 0.089 0.04 0.009 0
=========== ===== ===== ====== ===== ===== =====
idiap_resnet50_arcface_2021
---------------------------
Arcface (resnet 50 **not the iResnet**) model trained using tensorflow.
FNMR
====
=========== ===== ===== ====== ===== ===== =====
Ethnicities 1e-06 1e-05 0.0001 0.001 0.01 0.1
White-Black 0 0 0 0 0.003 0.047
White-White 0 0 0 0 0.011 0.124
Black-Black 0 0 0.001 0.003 0.022 0.153