Commit 0f494b10 authored by Tiago de Freitas Pereira's avatar Tiago de Freitas Pereira
Browse files

Added new experiments

parent ab52cb5a
Pipeline #52235 failed with stages
in 12 minutes and 4 seconds
......@@ -2,37 +2,9 @@
Loading functions for some databases
"""
from bob.bio.base.score.load import get_dataframe
from bob.bio.base.score.load import get_split_dataframe
def load_meds(filename):
return get_dataframe(filename)
def load(filename):
return get_split_dataframe(filename)
def load_mobio(filename):
return get_dataframe(filename)
def load_morph(filename, possible_races=["A", "B", "H", "W"], genders_considered=["M"]):
negatives, positives = get_dataframe(get_dataframe)
negatives = negatives[
(negatives.bio_ref_age_phd > 20)
& (negatives.probe_age_phd > 20)
& (negatives.bio_ref_sex.isin(genders_considered))
& (negatives.probe_sex.isin(genders_considered))
& (negatives.bio_ref_rac.isin(possible_races))
& (negatives.probe_rac.isin(possible_races))
]
positives = positives[
(positives.bio_ref_age_phd > 20)
& (positives.probe_age_phd > 20)
& (positives.bio_ref_sex.isin(genders_considered))
& (positives.probe_sex.isin(genders_considered))
& (positives.bio_ref_rac.isin(possible_races))
& (positives.probe_rac.isin(possible_races))
]
return negatices, positives
......@@ -90,33 +90,36 @@ def plot_demographic_boxplot(
"""
scores = dict()
for n in negatives_as_dict:
# Filtering out by percentile
if percentile is not None:
negatives = negatives_as_dict[n][
negatives_as_dict[n]["score"]
> negatives_as_dict[n]["score"].quantile(1 - percentile)
]
negatives = negatives["score"].to_numpy()
## If the search does't work, negatives=[]
try:
negatives = negatives["score"].compute().to_numpy()
except ValueError:
negatives = []
if n in positives_as_dict:
positives_as_dict[n]["score"].to_numpy()
positives_as_dict[n]["score"].compute().to_numpy()
positives = positives_as_dict[n][
positives_as_dict[n]["score"]
< positives_as_dict[n]["score"].quantile(percentile)
< positives_as_dict[n]["score"].compute().quantile(percentile)
]
positives = positives["score"].to_numpy()
positives = positives["score"].compute().to_numpy()
else:
positives = []
# Running with the whole data
else:
negatives = negatives_as_dict[n]["score"].to_numpy()
negatives = negatives_as_dict[n]["score"].compute().to_numpy()
positives = (
positives_as_dict[n]["score"].to_numpy()
positives_as_dict[n]["score"].compute().to_numpy()
if n in positives_as_dict
else []
)
......@@ -335,8 +338,8 @@ def plot_fmr_fnmr_tradeoff(
fnmrs[key] = []
for t in taus:
fmr, fnmr = bob.measure.farfrr(
negatives_as_dict[key]["score"].to_numpy(),
positives_as_dict[key]["score"].to_numpy(),
negatives_as_dict[key]["score"].compute().to_numpy(),
positives_as_dict[key]["score"].compute().to_numpy(),
t,
)
fmrs[key].append(fmr)
......
......@@ -2,14 +2,14 @@
Standar report API
"""
from .io import load_meds, load_mobio, load_morph
from .io import load
from .plot import plot_demographic_boxplot, plot_fmr_fnmr_tradeoff, plot_fdr
from . import compute_fmr_thresholds
from matplotlib.backends.backend_pdf import PdfPages
def load_dev_eval_scores(scores_dev, scores_eval):
def load_dev_eval_scores(scores_dev, scores_eval, load_fn=load):
if scores_eval is None:
scores_eval = [None for _ in scores_dev]
......@@ -21,7 +21,7 @@ def load_dev_eval_scores(scores_dev, scores_eval):
for i, (d, e) in enumerate(zip(scores_dev, scores_eval)):
n_dev, p_dev = load_meds(d)
n_dev, p_dev = load_fn(d)
negatives_dev.append(n_dev)
positives_dev.append(p_dev)
......@@ -29,7 +29,7 @@ def load_dev_eval_scores(scores_dev, scores_eval):
negatives_eval.append(None)
positives_eval.append(None)
else:
n_eval, p_eval = load_meds(e)
n_eval, p_eval = load_fn(e)
negatives_eval.append(n_eval)
positives_eval.append(p_eval)
return negatives_dev, positives_dev, negatives_eval, positives_eval
......@@ -382,3 +382,60 @@ def casia_africa_report(
pdf.savefig(fig)
pdf.close()
def rfw_report(
scores_dev,
output_filename,
fmr_thresholds=[10 ** i for i in list(range(-5, 0))],
percentile=0.01,
titles=None,
possible_races=["Asian", "African", "Caucasian", "Indian"],
):
variable_suffix = "race"
pdf = PdfPages(output_filename)
negatives_dev, positives_dev, _, _ = load_dev_eval_scores(scores_dev, None)
### FDR ###
# Compute FDR on the same set if there's no evaluation set
taus = [compute_fmr_thresholds(d, fmr_thresholds) for d in negatives_dev]
fig = plot_fdr(
negatives_dev,
positives_dev,
titles,
variable_suffix,
fmr_thresholds,
taus=taus,
)
pdf.savefig(fig)
for i, (n_dev, p_dev) in enumerate(zip(negatives_dev, positives_dev)):
title = None if titles is None else titles[i]
fig = plot_demographic_boxplot(
negatives_dev=n_dev,
positives_dev=p_dev,
variable_suffix=variable_suffix,
percentile=percentile,
fmr_thresholds=fmr_thresholds,
title=title,
)
pdf.savefig(fig)
#### PLOTTING THE FMR AND FNMR TRADE OFF
fig = plot_fmr_fnmr_tradeoff(
n_dev,
p_dev,
variable_suffix=variable_suffix,
fmr_thresholds=fmr_thresholds,
print_fmr_fnmr=True,
)
pdf.savefig(fig)
pdf.close()
......@@ -27,7 +27,7 @@ def compute_fmr_thresholds(negatives, fmrs=[0.1, 0.01, 0.001]):
"""
negatives_as_np = negatives["score"].to_numpy().astype("float64")
negatives_as_np = negatives.compute()["score"].to_numpy().astype("float64")
taus = [bob.measure.far_threshold(negatives_as_np, [], far_value=t) for t in fmrs]
return taus
......@@ -188,8 +188,8 @@ def fairness_discrepancy_rate(
B_tau = []
for key in positives_as_dict:
a_tau, b_tau = bob.measure.farfrr(
negatives_as_dict[key]["score"].to_numpy(),
positives_as_dict[key]["score"].to_numpy(),
negatives_as_dict[key]["score"].compute().to_numpy(),
positives_as_dict[key]["score"].compute().to_numpy(),
t,
)
A_tau.append(a_tau)
......
......@@ -5,9 +5,13 @@ from bob.bio.demographics.reports import (
morph_report,
mobio_report,
casia_africa_report,
rfw_report,
)
import click
from bob.pipelines.distributed import VALID_DASK_CLIENT_STRINGS
from bob.extension.scripts.click_helper import ResourceOption
@click.command()
@common_options.scores_argument(nargs=-1)
......@@ -49,8 +53,17 @@ def meds(ctx, scores, evaluation, output, titles, percentile, **kargs):
help="Percentil of the data used to plot the score distribution box-plots",
default=0.01,
)
@click.option(
"--dask-client",
"-l",
entry_point_group="dask.client",
string_exceptions=VALID_DASK_CLIENT_STRINGS,
default="single-threaded",
help="Dask client for the execution of the pipeline.",
cls=ResourceOption,
)
@click.pass_context
def morph(ctx, scores, evaluation, output, titles, percentile, **kargs):
def morph(ctx, scores, evaluation, output, titles, percentile, dask_client, **kargs):
"""Standard demographics analysis using the MORPH dataset"""
scores = np.array(scores, dtype="object")
......@@ -64,6 +77,8 @@ def morph(ctx, scores, evaluation, output, titles, percentile, **kargs):
# Hardcoding the percentile of the score distribution
morph_report(scores_dev, output, scores_eval, percentile=percentile, titles=titles)
dask_client.shutdown()
pass
......@@ -139,3 +154,22 @@ def casia_africa(
age_interval=age_interval,
target_ethnicity=target_demographic,
)
@click.command()
@common_options.scores_argument(nargs=-1)
@common_options.titles_option()
@common_options.output_plot_file_option(default_out="demographics_rfw.pdf")
@click.option(
"--percentile",
type=float,
help="Percentil of the data used to plot the score distribution box-plots",
default=0.5,
)
@click.pass_context
def rfw(ctx, scores, output, titles, percentile, **kargs):
"""Standard demographics analysis using the CASIA AFRICA dataset"""
rfw_report(
scores, output, percentile=percentile, titles=titles,
)
......@@ -4,113 +4,109 @@ import os
from bob.extension import rc
## PLEASE SET
# bob config set bob.bio.demographics.path [OUTPUT-PATH]
OUTPUT_PATH = (
"./results"
if rc.get("bob.bio.demographics.path") is None
else rc.get("bob.bio.demographics.path")
)
# In case the pipeline fails
# it tries again for `max_attempts` time
max_attempts = 5
import click
# DATABASE
from bob.bio.face.database import CasiaAfricaDatabase
protocol = "ID-V-All-Ep1"
database = CasiaAfricaDatabase(protocol=protocol)
database_name = "casia-africa"
@click.command()
@click.option(
"--output-path",
default=rc.get("bob.bio.demographics.path"),
help=f'Output path. Default to: {rc.get("bob.bio.demographics.path")}',
)
@click.option("--sge", is_flag=True)
def run(output_path, sge):
# bob config set bob.bio.demographics.path [OUTPUT-PATH]
output_path = "./results" if output_path is None else output_path
# Baselines
# In case the pipeline fails
# it tries again for `max_attempts` time
max_attempts = 5
from bob.bio.face.embeddings.tensorflow import (
inception_resnet_v2_msceleb_centerloss_2018,
facenet_sanderberg_20170512_110547,
resnet50_msceleb_arcface_2021,
resnet50_msceleb_arcface_20210521,
)
# DATABASE
from bob.bio.face.database import CasiaAfricaDatabase
from bob.bio.face.embeddings.pytorch import (
iresnet100,
iresnet50,
)
protocol = "ID-V-All-Ep1"
database = CasiaAfricaDatabase(protocol=protocol)
database_name = "casia-africa"
from bob.bio.face.embeddings.mxnet import arcface_insightFace_lresnet100
# Baselines
from bob.bio.face.embeddings.opencv import vgg16_oxford_baseline
from bob.bio.face.embeddings.tensorflow import (
inception_resnet_v2_msceleb_centerloss_2018,
facenet_sanderberg_20170512_110547,
resnet50_msceleb_arcface_2021,
resnet50_msceleb_arcface_20210521,
)
baselines = [
inception_resnet_v2_msceleb_centerloss_2018,
facenet_sanderberg_20170512_110547,
resnet50_msceleb_arcface_2021,
resnet50_msceleb_arcface_20210521,
iresnet100,
iresnet50,
arcface_insightFace_lresnet100,
vgg16_oxford_baseline,
]
from bob.bio.face.embeddings.pytorch import (
iresnet100,
iresnet50,
)
from bob.bio.face.embeddings.mxnet import arcface_insightFace_lresnet100
## Running baselines
from bob.bio.face.embeddings.opencv import vgg16_oxford_baseline
from bob.bio.base.script.vanilla_biometrics import execute_vanilla_biometrics
baselines = [
inception_resnet_v2_msceleb_centerloss_2018,
facenet_sanderberg_20170512_110547,
resnet50_msceleb_arcface_2021,
resnet50_msceleb_arcface_20210521,
iresnet100,
iresnet50,
arcface_insightFace_lresnet100,
vgg16_oxford_baseline,
]
for b in baselines:
## Running baselines
print(f"Running {b.__name__}....")
from bob.bio.base.script.vanilla_biometrics import execute_vanilla_biometrics
for i in range(max_attempts):
for b in baselines:
IDIAP = True
dask_client = None
if IDIAP:
from dask.distributed import Client
print(f"Running {b.__name__}....")
from bob.pipelines.distributed.sge import SGEMultipleQueuesCluster
for i in range(max_attempts):
cluster = SGEMultipleQueuesCluster(min_jobs=1)
dask_client = Client(cluster)
dask_client = None
if sge:
from dask.distributed import Client
### Running the baseline
output_path = os.path.join(
OUTPUT_PATH, "no-processing", database_name, b.__name__
)
from bob.pipelines.distributed.sge import SGEMultipleQueuesCluster
try:
execute_vanilla_biometrics(
b(
annotation_type=database.annotation_type,
fixed_positions=database.fixed_positions,
),
database,
dask_client,
groups=["dev"],
output=output_path,
write_metadata_scores=True,
checkpoint=True,
dask_partition_size=200,
dask_n_workers=80,
)
cluster = SGEMultipleQueuesCluster(min_jobs=1)
dask_client = Client(cluster)
execute_vanilla_biometrics(
b(
annotation_type=database.annotation_type,
fixed_positions=database.fixed_positions,
),
database,
dask_client,
groups=["eval"],
output=output_path,
write_metadata_scores=True,
checkpoint=True,
dask_partition_size=200,
dask_n_workers=80,
### Running the baseline
experiment_path = os.path.join(
output_path, "no-processing", database_name, b.__name__
)
except:
print(f"Failed on attempt {i}")
if dask_client is not None:
dask_client.shutdown()
try:
execute_vanilla_biometrics(
b(
annotation_type=database.annotation_type,
fixed_positions=database.fixed_positions,
),
database,
dask_client,
groups=["dev"],
output=experiment_path,
write_metadata_scores=True,
checkpoint=False,
dask_partition_size=100,
dask_n_workers=80,
)
except:
print(f"Failed on attempt {i}")
if dask_client is not None:
dask_client.shutdown()
print("Done!")
if __name__ == "__main__":
run()
......@@ -4,87 +4,97 @@ import os
from bob.extension import rc
## PLEASE SET
# bob config set bob.bio.demographics.path [OUTPUT-PATH]
OUTPUT_PATH = (
"./results"
if rc.get("bob.bio.demographics.path") is None
else rc.get("bob.bio.demographics.path")
import click
@click.command()
@click.option(
"--output-path",
default=rc.get("bob.bio.demographics.path"),
help=f'Output path. Default to: {rc.get("bob.bio.demographics.path")}',
)
@click.option("--sge", is_flag=True)
def run(output_path, sge):
# bob config set bob.bio.demographics.path [OUTPUT-PATH]
output_path = "./results" if output_path is None else output_path
# DATABASE
from bob.bio.face.database import MEDSDatabase
# DATABASE
from bob.bio.face.database import MEDSDatabase
protocol = "verification_fold1"
database = MEDSDatabase(protocol=protocol)
database_name = "meds"
protocol = "verification_fold1"
database = MEDSDatabase(protocol=protocol)
database_name = "meds"
# Baselines
# Baselines
from bob.bio.face.embeddings.tensorflow import (
inception_resnet_v2_msceleb_centerloss_2018,
facenet_sanderberg_20170512_110547,
resnet50_msceleb_arcface_2021,
resnet50_msceleb_arcface_20210521,
)
from bob.bio.face.embeddings.tensorflow import (
inception_resnet_v2_msceleb_centerloss_2018,
facenet_sanderberg_20170512_110547,
resnet50_msceleb_arcface_2021,
resnet50_msceleb_arcface_20210521,
)
from bob.bio.face.embeddings.pytorch import (
iresnet100,
iresnet50,
)
from bob.bio.face.embeddings.pytorch import (
iresnet100,
iresnet50,
)
from bob.bio.face.embeddings.mxnet import arcface_insightFace_lresnet100
from bob.bio.face.embeddings.mxnet import arcface_insightFace_lresnet100
from bob.bio.face.embeddings.opencv import vgg16_oxford_baseline
from bob.bio.face.embeddings.opencv import vgg16_oxford_baseline
baselines = [
inception_resnet_v2_msceleb_centerloss_2018,
facenet_sanderberg_20170512_110547,
resnet50_msceleb_arcface_2021,
resnet50_msceleb_arcface_20210521,
iresnet100,
iresnet50,
arcface_insightFace_lresnet100,
vgg16_oxford_baseline,
]
## Running baselines
baselines = [
inception_resnet_v2_msceleb_centerloss_2018,
facenet_sanderberg_20170512_110547,
resnet50_msceleb_arcface_2021,
resnet50_msceleb_arcface_20210521,
iresnet100,
iresnet50,
arcface_insightFace_lresnet100,
vgg16_oxford_baseline,
]
from bob.bio.base.script.vanilla_biometrics import execute_vanilla_biometrics
for b in baselines:
## Running baselines
print(f"Running {b.__name__}....")
from bob.bio.base.script.vanilla_biometrics import execute_vanilla_biometrics
dask_client = None
if sge:
from dask.distributed import Client
for b in baselines:
from bob.pipelines.distributed.sge import SGEMultipleQueuesCluster
print(f"Running {b.__name__}....")
cluster = SGEMultipleQueuesCluster(min_jobs=1)
dask_client = Client(cluster)
IDIAP = True
dask_client = None
if IDIAP:
from dask.distributed import Client
### Running the baseline
experiment_path = os.path.join(
output_path, "no-processing", database_name, b.__name__
)
execute_vanilla_biometrics(
b(
annotation_type=database.annotation_type,
fixed_positions=database.fixed_positions,
),
database,
dask_client,
groups=["dev", "eval"],
output=experiment_path,
write_metadata_scores=True,
checkpoint=False,
dask_partition_size=200,
dask_n_workers=20,
)
if dask_client is not None:
dask_client.shutdown()
from bob.pipelines.distributed.sge import SGEMultipleQueuesCluster