From ab52cb5a1cfa2d1d2f3b984a373149d30e15677b Mon Sep 17 00:00:00 2001 From: Tiago Freitas Pereira <tiagofrepereira@gmail.com> Date: Fri, 25 Jun 2021 12:50:33 +0200 Subject: [PATCH] Updated some reports --- bob/bio/demographics/io.py | 38 ++ bob/bio/demographics/plot.py | 42 ++- bob/bio/demographics/reports.py | 384 ++++++++++++++++++++ bob/bio/demographics/script/__init__.py | 0 bob/bio/demographics/script/commands.py | 141 +++++++ bob/bio/demographics/script/demographics.py | 13 + experiments/no-processing/casia-africa.py | 116 ++++++ experiments/no-processing/meds.py | 90 +++++ experiments/no-processing/mobio.py | 95 +++++ experiments/no-processing/morph.py | 125 +++++++ experiments/score-norm/meds.py | 93 +++++ setup.py | 71 ++-- 12 files changed, 1166 insertions(+), 42 deletions(-) create mode 100644 bob/bio/demographics/io.py create mode 100644 bob/bio/demographics/reports.py create mode 100644 bob/bio/demographics/script/__init__.py create mode 100644 bob/bio/demographics/script/commands.py create mode 100644 bob/bio/demographics/script/demographics.py create mode 100644 experiments/no-processing/casia-africa.py create mode 100644 experiments/no-processing/meds.py create mode 100644 experiments/no-processing/mobio.py create mode 100644 experiments/no-processing/morph.py create mode 100644 experiments/score-norm/meds.py diff --git a/bob/bio/demographics/io.py b/bob/bio/demographics/io.py new file mode 100644 index 0000000..d97eaf0 --- /dev/null +++ b/bob/bio/demographics/io.py @@ -0,0 +1,38 @@ +""" +Loading functions for some databases +""" + +from bob.bio.base.score.load import get_dataframe + + +def load_meds(filename): + return get_dataframe(filename) + + +def load_mobio(filename): + return get_dataframe(filename) + + +def load_morph(filename, possible_races=["A", "B", "H", "W"], genders_considered=["M"]): + + negatives, positives = get_dataframe(get_dataframe) + + negatives = negatives[ + (negatives.bio_ref_age_phd > 20) + & (negatives.probe_age_phd > 20) + & (negatives.bio_ref_sex.isin(genders_considered)) + & (negatives.probe_sex.isin(genders_considered)) + & (negatives.bio_ref_rac.isin(possible_races)) + & (negatives.probe_rac.isin(possible_races)) + ] + + positives = positives[ + (positives.bio_ref_age_phd > 20) + & (positives.probe_age_phd > 20) + & (positives.bio_ref_sex.isin(genders_considered)) + & (positives.probe_sex.isin(genders_considered)) + & (positives.bio_ref_rac.isin(possible_races)) + & (positives.probe_rac.isin(possible_races)) + ] + + return negatices, positives diff --git a/bob/bio/demographics/plot.py b/bob/bio/demographics/plot.py index 14c5bb8..2dc762b 100644 --- a/bob/bio/demographics/plot.py +++ b/bob/bio/demographics/plot.py @@ -125,6 +125,9 @@ def plot_demographic_boxplot( return scores def _plot(scores, axes, labels): + + # This code raises a warning + # https://github.com/matplotlib/matplotlib/issues/16353 bp_negatives = axes.boxplot( [scores[s][0] for s in scores], patch_artist=True, @@ -154,7 +157,11 @@ def plot_demographic_boxplot( # Plotting the boxplots fig, ax = plt.subplots(figsize=(16, 8)) - title = title if percentile is None else title + f" percentile = {percentile}" + title = ( + title + if percentile is None + else title + f" - boxplot at percentile = {percentile}" + ) fig.suptitle(title) negatives_dev_as_dict @@ -162,14 +169,17 @@ def plot_demographic_boxplot( axes = plt.subplot(2, 1, 1) else: axes = plt.subplot(1, 1, 1) + plt.title("development set") def _compute_scores_and_plot( negatives_as_dict, positives_as_dict, axes, plot_fmrs=True ): + # Computing the scores scores = _get_scores(negatives_as_dict, positives_as_dict) # Plotting the box plot _plot(scores, axes, labels) + plt.grid(True) if plot_fmrs: if taus is not None: @@ -185,8 +195,8 @@ def plot_demographic_boxplot( _compute_scores_and_plot(negatives_dev_as_dict, positives_dev_as_dict, axes) if has_eval(): - axes = plt.subplot(2, 1, 2) - + axes = plt.subplot(2, 1, 2, sharex=axes) + plt.title("evaluation set") _compute_scores_and_plot( negatives_eval_as_dict, positives_eval_as_dict, axes, plot_fmrs=False ) @@ -254,6 +264,9 @@ def plot_fdr( assert len(positives) == len(negatives) == len(labels) title = "Fairness Discrepancy Rate" if title is None else title + if taus is None: + taus = [compute_fmr_thresholds(n, fmr_thresholds) for n in negatives] + fdrs = [ fairness_discrepancy_rate( neg, @@ -293,6 +306,7 @@ def plot_fmr_fnmr_tradeoff( positives_eval=None, label_lookup_table=None, title="False Match and False non Match trade-off per demographic", + print_fmr_fnmr=False, ): def has_eval(): return negatives_eval is not None and positives_eval is not None @@ -328,9 +342,8 @@ def plot_fmr_fnmr_tradeoff( fmrs[key].append(fmr) fnmrs[key].append(fnmr) - # Plottint the FMR and FNMR in two + # Plotting the FMR and FNMR in two # separated subplots - fig, ax = plt.subplots(figsize=(16, 8)) fig.suptitle(title) @@ -351,7 +364,24 @@ def plot_fmr_fnmr_tradeoff( plt.xlabel("$\\tau=FMR_{10^{-x}}$", fontsize=18) plt.grid(True) - plt.legend() + ## Printing + if print_fmr_fnmr: + + def print_table(header, fmrs, title): + from tabulate import tabulate + + print(title) + content = [ + [l] + [round(value, 3) for value in fmrs[key]] + for key, l in zip(fmrs, labels) + ] + + print(tabulate([header] + content)) + + header = ["Ethnicities"] + fmr_thresholds + print_table(header, fmrs, title=f"{title} - FMR") + print_table(header, fnmrs, title=f"{title} - FNMR") + return fig diff --git a/bob/bio/demographics/reports.py b/bob/bio/demographics/reports.py new file mode 100644 index 0000000..ec2285a --- /dev/null +++ b/bob/bio/demographics/reports.py @@ -0,0 +1,384 @@ +""" +Standar report API +""" + +from .io import load_meds, load_mobio, load_morph +from .plot import plot_demographic_boxplot, plot_fmr_fnmr_tradeoff, plot_fdr +from . import compute_fmr_thresholds + +from matplotlib.backends.backend_pdf import PdfPages + + +def load_dev_eval_scores(scores_dev, scores_eval): + if scores_eval is None: + scores_eval = [None for _ in scores_dev] + + negatives_dev = [] + positives_dev = [] + + negatives_eval = [] + positives_eval = [] + + for i, (d, e) in enumerate(zip(scores_dev, scores_eval)): + + n_dev, p_dev = load_meds(d) + negatives_dev.append(n_dev) + positives_dev.append(p_dev) + + if e is None: + negatives_eval.append(None) + positives_eval.append(None) + else: + n_eval, p_eval = load_meds(e) + negatives_eval.append(n_eval) + positives_eval.append(p_eval) + return negatives_dev, positives_dev, negatives_eval, positives_eval + + +def meds_report( + scores_dev, + output_filename, + scores_eval=None, + fmr_thresholds=[10 ** i for i in list(range(-6, 0))], + percentile=0.01, + titles=None, +): + + label_lookup_table = { + "W__W": "White-White", + "W__B": "White-Black", + "B__W": "Black-White", + "B__B": "Black-Black", + } + + variable_suffix = "rac" + + pdf = PdfPages(output_filename) + + negatives_dev, positives_dev, negatives_eval, positives_eval = load_dev_eval_scores( + scores_dev, scores_eval + ) + + if negatives_eval[0] is None: + + # Compute FDR on the same set if there's no evaluation set + fig = plot_fdr( + negatives_dev, positives_dev, titles, variable_suffix, fmr_thresholds + ) + else: + # If there is evaluation set + # compute the decision thresholds + + taus = [compute_fmr_thresholds(d, fmr_thresholds) for d in negatives_dev] + + fig = plot_fdr( + negatives_dev, + positives_dev, + titles, + variable_suffix, + fmr_thresholds, + taus=taus, + ) + + pdf.savefig(fig) + + for i, (n_dev, p_dev, n_eval, p_eval) in enumerate( + zip(negatives_dev, positives_dev, negatives_eval, positives_eval) + ): + + title = None if titles is None else titles[i] + + fig = plot_demographic_boxplot( + negatives_dev=n_dev, + positives_dev=p_dev, + variable_suffix=variable_suffix, + negatives_eval=n_eval, + positives_eval=p_eval, + label_lookup_table=label_lookup_table, + percentile=percentile, + fmr_thresholds=fmr_thresholds, + title=title, + ) + pdf.savefig(fig) + + #### PLOTTING THE FMR AND FNMR TRADE OFF + fig = plot_fmr_fnmr_tradeoff( + n_dev, + p_dev, + variable_suffix=variable_suffix, + fmr_thresholds=fmr_thresholds, + negatives_eval=n_eval, + positives_eval=p_eval, + label_lookup_table=label_lookup_table, + print_fmr_fnmr=True, + title=title, + ) + pdf.savefig(fig) + + pdf.close() + + +def morph_report( + scores_dev, + output_filename, + scores_eval=None, + fmr_thresholds=[10 ** i for i in list(range(-8, 0))], + percentile=0.01, + titles=None, + possible_races=["A", "B", "H", "W"], + genders_considered=["M"], +): + + variables = { + "O": "Other", + "A": "Asian", + "W": "White", + "B": "Black", + "H": "Hispanic", + } + + label_lookup_table = dict() + for a in list(variables.keys()): + for b in list(variables.keys()): + label_lookup_table[f"{a}__{b}"] = f"{variables[a]}__{variables[b]}" + + variable_suffix = "rac" + + pdf = PdfPages(output_filename) + + negatives_dev, positives_dev, negatives_eval, positives_eval = load_dev_eval_scores( + scores_dev, scores_eval + ) + + def filter_out(dataframe): + return dataframe[ + (dataframe.bio_ref_age_phd > 20) + & (dataframe.probe_age_phd > 20) + & (dataframe.bio_ref_sex.isin(genders_considered)) + & (dataframe.probe_sex.isin(genders_considered)) + & (dataframe.bio_ref_rac.isin(possible_races)) + & (dataframe.probe_rac.isin(possible_races)) + ] + + negatives_dev = [filter_out(n) for n in negatives_dev] + positives_dev = [filter_out(n) for n in positives_dev] + + if negatives_eval[0] is None: + + # Compute FDR on the same set if there's no evaluation set + fig = plot_fdr( + negatives_dev, positives_dev, titles, variable_suffix, fmr_thresholds + ) + else: + # If there is evaluation set + # compute the decision thresholds + + negatives_eval = [filter_out(n) for n in negatives_eval] + positives_eval = [filter_out(n) for n in positives_eval] + + taus = [compute_fmr_thresholds(d, fmr_thresholds) for d in negatives_dev] + + fig = plot_fdr( + negatives_dev, + positives_dev, + titles, + variable_suffix, + fmr_thresholds, + taus=taus, + ) + + pdf.savefig(fig) + + for i, (n_dev, p_dev, n_eval, p_eval) in enumerate( + zip(negatives_dev, positives_dev, negatives_eval, positives_eval) + ): + + title = None if titles is None else titles[i] + + fig = plot_demographic_boxplot( + negatives_dev=n_dev, + positives_dev=p_dev, + variable_suffix=variable_suffix, + negatives_eval=n_eval, + positives_eval=p_eval, + label_lookup_table=label_lookup_table, + percentile=percentile, + fmr_thresholds=fmr_thresholds, + title=title, + ) + pdf.savefig(fig) + + #### PLOTTING THE FMR AND FNMR TRADE OFF + fig = plot_fmr_fnmr_tradeoff( + n_dev, + p_dev, + variable_suffix=variable_suffix, + fmr_thresholds=fmr_thresholds, + negatives_eval=n_eval, + positives_eval=p_eval, + print_fmr_fnmr=True, + label_lookup_table=label_lookup_table, + ) + pdf.savefig(fig) + + pdf.close() + + +def mobio_report( + scores_dev, + output_filename, + scores_eval=None, + fmr_thresholds=[10 ** i for i in list(range(-6, 0))], + percentile=0.1, + titles=None, +): + + label_lookup_table = { + "m__m": "Male-Male", + "m__f": "Male-Female", + "f__m": "Female-Male", + "f__f": "Female-Female", + } + + variable_suffix = "gender" + + pdf = PdfPages(output_filename) + + negatives_dev, positives_dev, negatives_eval, positives_eval = load_dev_eval_scores( + scores_dev, scores_eval + ) + + if negatives_eval[0] is None: + + # Compute FDR on the same set if there's no evaluation set + fig = plot_fdr( + negatives_dev, positives_dev, titles, variable_suffix, fmr_thresholds + ) + else: + # If there is evaluation set + # compute the decision thresholds + + taus = [compute_fmr_thresholds(d, fmr_thresholds) for d in negatives_dev] + + fig = plot_fdr( + negatives_dev, + positives_dev, + titles, + variable_suffix, + fmr_thresholds, + taus=taus, + ) + + pdf.savefig(fig) + + for i, (n_dev, p_dev, n_eval, p_eval) in enumerate( + zip(negatives_dev, positives_dev, negatives_eval, positives_eval) + ): + + title = None if titles is None else titles[i] + + fig = plot_demographic_boxplot( + negatives_dev=n_dev, + positives_dev=p_dev, + variable_suffix=variable_suffix, + negatives_eval=n_eval, + positives_eval=p_eval, + label_lookup_table=label_lookup_table, + percentile=percentile, + fmr_thresholds=fmr_thresholds, + title=title, + ) + pdf.savefig(fig) + + #### PLOTTING THE FMR AND FNMR TRADE OFF + fig = plot_fmr_fnmr_tradeoff( + n_dev, + p_dev, + variable_suffix=variable_suffix, + fmr_thresholds=fmr_thresholds, + negatives_eval=n_eval, + positives_eval=p_eval, + label_lookup_table=label_lookup_table, + print_fmr_fnmr=True, + title=title, + ) + pdf.savefig(fig) + + pdf.close() + + +def casia_africa_report( + scores_dev, + output_filename, + fmr_thresholds=[10 ** i for i in list(range(-8, 0))], + percentile=0.1, + titles=None, + age_interval=[0, 1000], + target_ethnicity=None, +): + variable_suffix = "gender" + + pdf = PdfPages(output_filename) + + negatives_dev, positives_dev, _, _ = load_dev_eval_scores(scores_dev, None) + + def filter_out(dataframe, age_interval, target_ethnicity): + dataframe = dataframe[ + dataframe.probe_age.between(age_interval[0], age_interval[1]) + & dataframe.bio_ref_age.between(age_interval[0], age_interval[1]) + ] + + if target_ethnicity is None: + return dataframe + + dataframe = dataframe[ + dataframe.bio_ref_ethnicity + == target_ethnicity & dataframe.probe_ethnicity + == target_ethnicity + ] + return dataframe + + negatives_dev = [ + filter_out(n, age_interval, target_ethnicity) for n in negatives_dev + ] + positives_dev = [ + filter_out(p, age_interval, target_ethnicity) for p in positives_dev + ] + + # Compute FDR on the same set if there's no evaluation set + fig = plot_fdr( + negatives_dev, positives_dev, titles, variable_suffix, fmr_thresholds + ) + + pdf.savefig(fig) + + for i, (n_dev, p_dev) in enumerate(zip(negatives_dev, positives_dev)): + + title = None if titles is None else titles[i] + + fig = plot_demographic_boxplot( + negatives_dev=n_dev, + positives_dev=p_dev, + variable_suffix=variable_suffix, + negatives_eval=None, + positives_eval=None, + percentile=percentile, + fmr_thresholds=fmr_thresholds, + title=title, + ) + pdf.savefig(fig) + + #### PLOTTING THE FMR AND FNMR TRADE OFF + fig = plot_fmr_fnmr_tradeoff( + n_dev, + p_dev, + variable_suffix=variable_suffix, + fmr_thresholds=fmr_thresholds, + negatives_eval=None, + positives_eval=None, + print_fmr_fnmr=True, + title=title, + ) + pdf.savefig(fig) + + pdf.close() diff --git a/bob/bio/demographics/script/__init__.py b/bob/bio/demographics/script/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bob/bio/demographics/script/commands.py b/bob/bio/demographics/script/commands.py new file mode 100644 index 0000000..1aa4191 --- /dev/null +++ b/bob/bio/demographics/script/commands.py @@ -0,0 +1,141 @@ +from bob.measure.script import common_options +import numpy as np +from bob.bio.demographics.reports import ( + meds_report, + morph_report, + mobio_report, + casia_africa_report, +) +import click + + +@click.command() +@common_options.scores_argument(nargs=-1) +@common_options.titles_option() +@common_options.eval_option() +@common_options.output_plot_file_option(default_out="demographics_meds.pdf") +@click.option( + "--percentile", + type=float, + help="Percentil of the data used to plot the score distribution box-plots", + default=0.1, +) +@click.pass_context +def meds(ctx, scores, evaluation, output, titles, percentile, **kargs): + """Standard demographics analysis using the MEDS dataset""" + + scores = np.array(scores, dtype="object") + + if evaluation: + scores_dev = scores[[i for i in list(range(len(scores))) if i % 2 == 0]] + scores_eval = scores[[i for i in list(range(len(scores))) if i % 2 != 0]] + else: + scores_dev = scores + scores_eval = None + + meds_report(scores_dev, output, scores_eval, percentile=percentile, titles=titles) + + pass + + +@click.command() +@common_options.scores_argument(nargs=-1) +@common_options.titles_option() +@common_options.eval_option() +@common_options.output_plot_file_option(default_out="demographics_morph.pdf") +@click.option( + "--percentile", + type=float, + help="Percentil of the data used to plot the score distribution box-plots", + default=0.01, +) +@click.pass_context +def morph(ctx, scores, evaluation, output, titles, percentile, **kargs): + """Standard demographics analysis using the MORPH dataset""" + + scores = np.array(scores, dtype="object") + if evaluation: + scores_dev = scores[[i for i in list(range(len(scores))) if i % 2 == 0]] + scores_eval = scores[[i for i in list(range(len(scores))) if i % 2 != 0]] + else: + scores_dev = scores + scores_eval = None + + # Hardcoding the percentile of the score distribution + morph_report(scores_dev, output, scores_eval, percentile=percentile, titles=titles) + + pass + + +@click.command() +@common_options.scores_argument(nargs=-1) +@common_options.titles_option() +@common_options.eval_option() +@common_options.output_plot_file_option(default_out="demographics_mobio.pdf") +@click.pass_context +@click.option( + "--percentile", + type=float, + help="Percentil of the data used to plot the score distribution box-plots", + default=0.1, +) +def mobio(ctx, scores, evaluation, output, titles, percentile, **kargs): + """Standard demographics analysis using the MOBIO dataset""" + + scores = np.array(scores, dtype="object") + if evaluation: + scores_dev = scores[[i for i in list(range(len(scores))) if i % 2 == 0]] + scores_eval = scores[[i for i in list(range(len(scores))) if i % 2 != 0]] + else: + scores_dev = scores + scores_eval = None + + # Hardcoding the percentile of the score distribution + mobio_report(scores_dev, output, scores_eval, percentile=percentile, titles=titles) + + pass + + +@click.command() +@common_options.scores_argument(nargs=-1) +@common_options.titles_option() +@common_options.output_plot_file_option(default_out="demographics_casia-africa.pdf") +@click.option("--min-age", type=int, help="Minimum age", default=0) +@click.option("--max-age", type=int, help="Maximum age", default=1000) +@click.option( + "--target-demographic", + default=None, + help="Target demographic ethinicity you want to work with. The Casia-Africa database contains data from 49 ethnical groups. If this variable is not set, all of them will be used", +) +@click.option( + "--percentile", + type=float, + help="Percentil of the data used to plot the score distribution box-plots", + default=0.1, +) +@click.pass_context +def casia_africa( + ctx, + scores, + output, + titles, + target_demographic, + min_age, + max_age, + percentile, + **kargs +): + """Standard demographics analysis using the CASIA AFRICA dataset""" + + age_interval = (min_age, max_age) + + # Hardcoding the percentile of the score distribution + + casia_africa_report( + scores, + output, + percentile=percentile, + titles=titles, + age_interval=age_interval, + target_ethnicity=target_demographic, + ) diff --git a/bob/bio/demographics/script/demographics.py b/bob/bio/demographics/script/demographics.py new file mode 100644 index 0000000..4c9ccbd --- /dev/null +++ b/bob/bio/demographics/script/demographics.py @@ -0,0 +1,13 @@ +"""The main entry for bob.bio (click-based) scripts. +""" +import click +import pkg_resources +from click_plugins import with_plugins +from bob.extension.scripts.click_helper import AliasedGroup + + +@with_plugins(pkg_resources.iter_entry_points("bob.bio.demographics.cli")) +@click.group(cls=AliasedGroup) +def demographics(): + """Demographics commands""" + pass diff --git a/experiments/no-processing/casia-africa.py b/experiments/no-processing/casia-africa.py new file mode 100644 index 0000000..285beda --- /dev/null +++ b/experiments/no-processing/casia-africa.py @@ -0,0 +1,116 @@ +# Running a batch of experiments on MEDS + +import os + +from bob.extension import rc + +## PLEASE SET +# bob config set bob.bio.demographics.path [OUTPUT-PATH] +OUTPUT_PATH = ( + "./results" + if rc.get("bob.bio.demographics.path") is None + else rc.get("bob.bio.demographics.path") +) +# In case the pipeline fails +# it tries again for `max_attempts` time +max_attempts = 5 + +# DATABASE +from bob.bio.face.database import CasiaAfricaDatabase + +protocol = "ID-V-All-Ep1" +database = CasiaAfricaDatabase(protocol=protocol) +database_name = "casia-africa" + + +# Baselines + +from bob.bio.face.embeddings.tensorflow import ( + inception_resnet_v2_msceleb_centerloss_2018, + facenet_sanderberg_20170512_110547, + resnet50_msceleb_arcface_2021, + resnet50_msceleb_arcface_20210521, +) + +from bob.bio.face.embeddings.pytorch import ( + iresnet100, + iresnet50, +) + +from bob.bio.face.embeddings.mxnet import arcface_insightFace_lresnet100 + +from bob.bio.face.embeddings.opencv import vgg16_oxford_baseline + +baselines = [ + inception_resnet_v2_msceleb_centerloss_2018, + facenet_sanderberg_20170512_110547, + resnet50_msceleb_arcface_2021, + resnet50_msceleb_arcface_20210521, + iresnet100, + iresnet50, + arcface_insightFace_lresnet100, + vgg16_oxford_baseline, +] + + +## Running baselines + +from bob.bio.base.script.vanilla_biometrics import execute_vanilla_biometrics + +for b in baselines: + + print(f"Running {b.__name__}....") + + for i in range(max_attempts): + + IDIAP = True + dask_client = None + if IDIAP: + from dask.distributed import Client + + from bob.pipelines.distributed.sge import SGEMultipleQueuesCluster + + cluster = SGEMultipleQueuesCluster(min_jobs=1) + dask_client = Client(cluster) + + ### Running the baseline + output_path = os.path.join( + OUTPUT_PATH, "no-processing", database_name, b.__name__ + ) + + try: + execute_vanilla_biometrics( + b( + annotation_type=database.annotation_type, + fixed_positions=database.fixed_positions, + ), + database, + dask_client, + groups=["dev"], + output=output_path, + write_metadata_scores=True, + checkpoint=True, + dask_partition_size=200, + dask_n_workers=80, + ) + + execute_vanilla_biometrics( + b( + annotation_type=database.annotation_type, + fixed_positions=database.fixed_positions, + ), + database, + dask_client, + groups=["eval"], + output=output_path, + write_metadata_scores=True, + checkpoint=True, + dask_partition_size=200, + dask_n_workers=80, + ) + + except: + print(f"Failed on attempt {i}") + + if dask_client is not None: + dask_client.shutdown() diff --git a/experiments/no-processing/meds.py b/experiments/no-processing/meds.py new file mode 100644 index 0000000..5771ab7 --- /dev/null +++ b/experiments/no-processing/meds.py @@ -0,0 +1,90 @@ +# Running a batch of experiments on MEDS + +import os + +from bob.extension import rc + +## PLEASE SET +# bob config set bob.bio.demographics.path [OUTPUT-PATH] +OUTPUT_PATH = ( + "./results" + if rc.get("bob.bio.demographics.path") is None + else rc.get("bob.bio.demographics.path") +) + + +# DATABASE +from bob.bio.face.database import MEDSDatabase + +protocol = "verification_fold1" +database = MEDSDatabase(protocol=protocol) +database_name = "meds" + + +# Baselines + +from bob.bio.face.embeddings.tensorflow import ( + inception_resnet_v2_msceleb_centerloss_2018, + facenet_sanderberg_20170512_110547, + resnet50_msceleb_arcface_2021, + resnet50_msceleb_arcface_20210521, +) + +from bob.bio.face.embeddings.pytorch import ( + iresnet100, + iresnet50, +) + +from bob.bio.face.embeddings.mxnet import arcface_insightFace_lresnet100 + +from bob.bio.face.embeddings.opencv import vgg16_oxford_baseline + + +baselines = [ + inception_resnet_v2_msceleb_centerloss_2018, + facenet_sanderberg_20170512_110547, + resnet50_msceleb_arcface_2021, + resnet50_msceleb_arcface_20210521, + iresnet100, + iresnet50, + arcface_insightFace_lresnet100, + vgg16_oxford_baseline, +] + + +## Running baselines + +from bob.bio.base.script.vanilla_biometrics import execute_vanilla_biometrics + +for b in baselines: + + print(f"Running {b.__name__}....") + + IDIAP = True + dask_client = None + if IDIAP: + from dask.distributed import Client + + from bob.pipelines.distributed.sge import SGEMultipleQueuesCluster + + cluster = SGEMultipleQueuesCluster(min_jobs=1) + dask_client = Client(cluster) + + ### Running the baseline + output_path = os.path.join(OUTPUT_PATH, "no-processing", database_name, b.__name__) + execute_vanilla_biometrics( + b( + annotation_type=database.annotation_type, + fixed_positions=database.fixed_positions, + ), + database, + dask_client, + groups=["dev", "eval"], + output=output_path, + write_metadata_scores=True, + checkpoint=False, + dask_partition_size=200, + dask_n_workers=20, + ) + if dask_client is not None: + dask_client.shutdown() diff --git a/experiments/no-processing/mobio.py b/experiments/no-processing/mobio.py new file mode 100644 index 0000000..b1ff81b --- /dev/null +++ b/experiments/no-processing/mobio.py @@ -0,0 +1,95 @@ +# Running a batch of experiments on MOBIO + +import os + +from bob.extension import rc + +## PLEASE SET +# bob config set bob.bio.demographics.path [OUTPUT-PATH] +OUTPUT_PATH = ( + "./results" + if rc.get("bob.bio.demographics.path") is None + else rc.get("bob.bio.demographics.path") +) + + +# DATABASE +from bob.bio.face.database import MobioDatabase + +protocol = "mobile0-male-female" +database = MobioDatabase(protocol=protocol) +database_name = "mobio" + + +# Baselines + +from bob.bio.face.embeddings.tensorflow import ( + inception_resnet_v2_msceleb_centerloss_2018, + facenet_sanderberg_20170512_110547, + resnet50_msceleb_arcface_2021, + resnet50_msceleb_arcface_20210521, +) + +from bob.bio.face.embeddings.pytorch import ( + iresnet100, + iresnet50, +) + +from bob.bio.face.embeddings.mxnet import arcface_insightFace_lresnet100 + +from bob.bio.face.embeddings.opencv import vgg16_oxford_baseline + + +baselines = [ + inception_resnet_v2_msceleb_centerloss_2018, + facenet_sanderberg_20170512_110547, + resnet50_msceleb_arcface_2021, + resnet50_msceleb_arcface_20210521, + iresnet100, + iresnet50, + arcface_insightFace_lresnet100, + vgg16_oxford_baseline, +] +# baselines = [ +# resnet50_msceleb_arcface_20210521, +# ] + + +## Running baselines + +from bob.bio.base.script.vanilla_biometrics import execute_vanilla_biometrics + +for b in baselines: + + print(f"Running {b.__name__}....") + + IDIAP = True + dask_client = None + if IDIAP: + from dask.distributed import Client + + from bob.pipelines.distributed.sge import SGEMultipleQueuesCluster + + cluster = SGEMultipleQueuesCluster(min_jobs=1) + dask_client = Client(cluster) + + ### Running the baseline + output_path = os.path.join(OUTPUT_PATH, "no-processing", database_name, b.__name__) + execute_vanilla_biometrics( + b( + annotation_type=database.annotation_type, + fixed_positions=database.fixed_positions, + ), + database, + dask_client, + groups=["dev", "eval"], + output=output_path, + write_metadata_scores=True, + checkpoint=False, + dask_partition_size=200, + dask_n_workers=20, + ) + if dask_client is not None: + dask_client.shutdown() + +print("Done!") diff --git a/experiments/no-processing/morph.py b/experiments/no-processing/morph.py new file mode 100644 index 0000000..0ed7bc4 --- /dev/null +++ b/experiments/no-processing/morph.py @@ -0,0 +1,125 @@ +# Running a batch of experiments on MEDS + +import os + +from bob.extension import rc + +## PLEASE SET +# bob config set bob.bio.demographics.path [OUTPUT-PATH] +OUTPUT_PATH = ( + "./results" + if rc.get("bob.bio.demographics.path") is None + else rc.get("bob.bio.demographics.path") +) +# In case the pipeline fails +# it tries again for `max_attempts` time +max_attempts = 5 + +# DATABASE +from bob.bio.face.database import MorphDatabase + +protocol = "verification_fold1" +database = MorphDatabase(protocol=protocol) +database_name = "morph" + + +# Baselines + +from bob.bio.face.embeddings.tensorflow import ( + inception_resnet_v2_msceleb_centerloss_2018, + facenet_sanderberg_20170512_110547, + resnet50_msceleb_arcface_2021, + resnet50_msceleb_arcface_20210521, +) + +from bob.bio.face.embeddings.pytorch import ( + iresnet100, + iresnet50, +) + +from bob.bio.face.embeddings.mxnet import arcface_insightFace_lresnet100 + +from bob.bio.face.embeddings.opencv import vgg16_oxford_baseline + +""" +baselines = [ + inception_resnet_v2_msceleb_centerloss_2018, + facenet_sanderberg_20170512_110547, + resnet50_msceleb_arcface_2021, + resnet50_msceleb_arcface_20210521, + iresnet100, + iresnet50, + arcface_insightFace_lresnet100, + vgg16_oxford_baseline, +] + +""" + + +baselines = [ + inception_resnet_v2_msceleb_centerloss_2018, + resnet50_msceleb_arcface_2021, +] + + +## Running baselines + +from bob.bio.base.script.vanilla_biometrics import execute_vanilla_biometrics + +for b in baselines: + + print(f"Running {b.__name__}....") + + for i in range(max_attempts): + + IDIAP = True + dask_client = None + if IDIAP: + from dask.distributed import Client + + from bob.pipelines.distributed.sge import SGEMultipleQueuesCluster + + cluster = SGEMultipleQueuesCluster(min_jobs=1) + dask_client = Client(cluster) + + ### Running the baseline + output_path = os.path.join( + OUTPUT_PATH, "no-processing", database_name, b.__name__ + ) + + try: + execute_vanilla_biometrics( + b( + annotation_type=database.annotation_type, + fixed_positions=database.fixed_positions, + ), + database, + dask_client, + groups=["dev"], + output=output_path, + write_metadata_scores=True, + checkpoint=True, + dask_partition_size=200, + dask_n_workers=80, + ) + + execute_vanilla_biometrics( + b( + annotation_type=database.annotation_type, + fixed_positions=database.fixed_positions, + ), + database, + dask_client, + groups=["eval"], + output=output_path, + write_metadata_scores=True, + checkpoint=True, + dask_partition_size=200, + dask_n_workers=80, + ) + + except: + print(f"Failed on attempt {i}") + + if dask_client is not None: + dask_client.shutdown() diff --git a/experiments/score-norm/meds.py b/experiments/score-norm/meds.py new file mode 100644 index 0000000..2fe7648 --- /dev/null +++ b/experiments/score-norm/meds.py @@ -0,0 +1,93 @@ +# Running a batch of experiments on MEDS + +import os + +from bob.extension import rc + +## PLEASE SET +# bob config set bob.bio.demographics.path [OUTPUT-PATH] +OUTPUT_PATH = ( + "./results" + if rc.get("bob.bio.demographics.path") is None + else rc.get("bob.bio.demographics.path") +) +consider_genuines = False +ztnorm_cohort_proportion = 1.0 + + +# DATABASE +from bob.bio.face.database import MEDSDatabase + +protocol = "verification_fold1" +database = MEDSDatabase(protocol=protocol) +database_name = "meds" + + +# Baselines + +from bob.bio.face.embeddings.tensorflow import ( + inception_resnet_v2_msceleb_centerloss_2018, + facenet_sanderberg_20170512_110547, + resnet50_msceleb_arcface_2021, + resnet50_msceleb_arcface_20210521, +) + +from bob.bio.face.embeddings.pytorch import ( + iresnet100, + iresnet50, +) + +from bob.bio.face.embeddings.mxnet import arcface_insightFace_lresnet100 + +from bob.bio.face.embeddings.opencv import vgg16_oxford_baseline + + +baselines = [ + inception_resnet_v2_msceleb_centerloss_2018, + facenet_sanderberg_20170512_110547, + resnet50_msceleb_arcface_2021, + resnet50_msceleb_arcface_20210521, + iresnet100, + iresnet50, + arcface_insightFace_lresnet100, + vgg16_oxford_baseline, +] + + +## Running baselines +from bob.bio.base.pipelines.vanilla_biometrics import execute_vanilla_biometrics_ztnorm + +for b in baselines: + + print(f"Running {b.__name__}....") + + IDIAP = True + dask_client = None + if IDIAP: + from dask.distributed import Client + + from bob.pipelines.distributed.sge import SGEMultipleQueuesCluster + + cluster = SGEMultipleQueuesCluster(min_jobs=1) + dask_client = Client(cluster) + + ### Running the baseline + output_path = os.path.join(OUTPUT_PATH, "score-norm", database_name, b.__name__) + execute_vanilla_biometrics_ztnorm( + b( + annotation_type=database.annotation_type, + fixed_positions=database.fixed_positions, + ), + database, + dask_client, + groups=["dev", "eval"], + output=output_path, + write_metadata_scores=True, + checkpoint=True, + dask_partition_size=100, + dask_n_workers=20, + consider_genuines=consider_genuines, + ztnorm_cohort_proportion=ztnorm_cohort_proportion, + ) + if dask_client is not None: + dask_client.shutdown() diff --git a/setup.py b/setup.py index 2465625..e92e428 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,8 @@ # -*- coding: utf-8 -*- from setuptools import setup, dist -dist.Distribution(dict(setup_requires=['bob.extension'])) + +dist.Distribution(dict(setup_requires=["bob.extension"])) from bob.extension.utils import find_packages from bob.extension.utils import load_requirements @@ -11,51 +12,49 @@ install_requires = load_requirements() setup( - - name='bob.bio.demographics', + name="bob.bio.demographics", version=open("version.txt").read().rstrip(), - description='bob.bio.demographics', - - url='https://gitlab.idiap.ch/bob/bob.bio.demographics', - license='BSD', - + description="bob.bio.demographics", + url="https://gitlab.idiap.ch/bob/bob.bio.demographics", + license="BSD", # there may be multiple authors (separate entries by comma) - author='Tiago de Freitas Pereira', - author_email='tiago.pereira@idiap.ch', - + author="Tiago de Freitas Pereira", + author_email="tiago.pereira@idiap.ch", # there may be a maintainer apart from the author - you decide - #maintainer='?', - #maintainer_email='email@example.com', - + # maintainer='?', + # maintainer_email='email@example.com', # you may add more keywords separating those by commas (a, b, c, ...) - keywords = "bob", - - long_description=open('README.rst').read(), - + keywords="bob", + long_description=open("README.rst").read(), # leave this here, it is pretty standard packages=find_packages(), include_package_data=True, - zip_safe = False, - + zip_safe=False, install_requires=install_requires, - entry_points={ - # add entry points (scripts, bob resources here, if any) - }, - + "bob.bio.cli": [ + "demographics = bob.bio.demographics.script.demographics:demographics", + ], + "bob.bio.demographics.cli": [ + "meds = bob.bio.demographics.script.commands:meds", + "morph = bob.bio.demographics.script.commands:morph", + "mobio = bob.bio.demographics.script.commands:mobio", + "casia-africa = bob.bio.demographics.script.commands:casia_africa ", + ], + }, # check classifiers, add and remove as you see fit # full list here: https://pypi.org/classifiers/ # don't remove the Bob framework unless it's not a bob package - classifiers = [ - 'Framework :: Bob', - 'Development Status :: 4 - Beta', - 'Intended Audience :: Science/Research', - 'License :: OSI Approved :: BSD License', - 'Natural Language :: English', - 'Programming Language :: Python', - 'Programming Language :: Python :: 3', - 'Topic :: Scientific/Engineering :: Artificial Intelligence', - 'Topic :: Software Development :: Libraries :: Python Modules', - ], + classifiers=[ + "Framework :: Bob", + "Development Status :: 4 - Beta", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: BSD License", + "Natural Language :: English", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: Software Development :: Libraries :: Python Modules", + ], +) -) \ No newline at end of file -- GitLab