From e9d32305bdbc7c38481e474ca6b1cb9d5c39cb7f Mon Sep 17 00:00:00 2001 From: Tiago Freitas Pereira <tiagofrepereira@gmail.com> Date: Mon, 7 Jun 2021 14:46:26 +0200 Subject: [PATCH] Inject samples example --- notebooks/inject_samples.ipynb | 350 +++++++++++++++++++++++++++++++++ 1 file changed, 350 insertions(+) create mode 100644 notebooks/inject_samples.ipynb diff --git a/notebooks/inject_samples.ipynb b/notebooks/inject_samples.ipynb new file mode 100644 index 00000000..52c2812a --- /dev/null +++ b/notebooks/inject_samples.ipynb @@ -0,0 +1,350 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Injecting extra samples in vanilla biometrics protocols\n", + "\n", + "Sometimes our experiments go beyond \"simple\" database protocols.\n", + "Sometimes we just want to analyze the impact of some extra samples in our experiments without writing a whole dataset intergace for that.\n", + "\n", + "This notebook shows how to \"inject\" samples that doesn't belong to any protocol to some existing protocol.\n", + "We'll show case how to inject samples to perform score normalization.\n", + "\n", + "## Preparing the database\n", + "\n", + "We'll show case how to perform this injection using the MEDS dataset." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "dask_client = None\n", + "\n", + "OUTPUT_PATH = \"\"\n", + "PATH_INJECTED_DATA = \"\"\n", + "\n", + "\n", + "##### CHANGE YOUR DATABASE HERE\n", + "from bob.bio.face.database import MEDSDatabase\n", + "\n", + "database = MEDSDatabase(protocol=\"verification_fold1\")\n", + "\n", + "# Fetching the keys\n", + "#references = database.zprobes()[0].references\n", + "references = database.probes(group=\"eval\")[0].references + database.probes(group=\"dev\")[0].references\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Loading samples that will be injected\n", + "\n", + "Here we'll inject samples for znorm and tnorm" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# PATH\n", + "import os\n", + "import functools\n", + "import bob.io.base\n", + "# Fetching real data\n", + "#treferences = database.treferences()\n", + "#zprobes = database.zprobes()\n", + "\n", + "eyes_annotations={'leye': (61, 120),\n", + " 'reye': (61, 63)}\n", + "\n", + "\n", + "treferences_lst = [\"0/0_ethnicity_0.png\",\n", + " \"0/0_ethnicity_7.png\"]\n", + "\n", + "zprobes_lst = [\"1/1_ethnicity_0.png\",\n", + " \"1/1_ethnicity_7.png\"]\n", + "\n", + "from bob.pipelines import Sample, DelayedSample, SampleSet\n", + "\n", + "# Converting every element in a list in a sample set\n", + "def list_to_sampleset(lst, base_path, eyes_annotations, references):\n", + " sample_sets = []\n", + " for i,l in enumerate(lst):\n", + " sample = DelayedSample(functools.partial(bob.io.base.load,os.path.join(base_path,l)),\n", + " key=l,\n", + " reference_id=str(i),\n", + " annotations=eyes_annotations\n", + " )\n", + " sset = SampleSet(samples=[sample],\n", + " key=l,\n", + " reference_id=str(i),\n", + " references=references)\n", + "\n", + " sample_sets.append(sset)\n", + " return sample_sets\n", + "\n", + "\n", + "treferences = list_to_sampleset(treferences_lst, PATH_INJECTED_DATA,eyes_annotations, references=None)\n", + "zprobes = list_to_sampleset(zprobes_lst, PATH_INJECTED_DATA, eyes_annotations, references=references)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "## Preparing the pipeline\n", + "\n", + "Here we are using the arcface from insight face (https://github.com/deepinsight/insightface).\n", + "Feel free to change it by looking at (`bob.bio.face.embeddings`)." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Pipeline(steps=[('ToDaskBag', ToDaskBag(partition_size=200)),\n", + " ('samplewrapper-1',\n", + " DaskWrapper(estimator=CheckpointWrapper(estimator=SampleWrapper(estimator=FaceCrop(annotator=BobIpMTCNN(),\n", + " cropped_image_size=(112,\n", + " 112),\n", + " cropped_positions={'leye': (55,\n", + " 81),\n", + " 'reye': (55,\n", + " 42)}),\n", + " fit_extra_arguments=(),\n", + " transform_extra_arguments=(('annotations',\n", + " 'annotations'),)),\n", + " fe...\n", + " save_func=<function save at 0x7fccf501c560>))),\n", + " ('samplewrapper-2',\n", + " DaskWrapper(estimator=CheckpointWrapper(estimator=SampleWrapper(estimator=ArcFaceInsightFace_LResNet100(),\n", + " fit_extra_arguments=(),\n", + " transform_extra_arguments=()),\n", + " features_dir='/idiap/temp/tpereira/inject-example/samplewrapper-2',\n", + " load_func=<function load at 0x7fccf501c3b0>,\n", + " save_func=<function save at 0x7fccf501c560>)))])\n" + ] + } + ], + "source": [ + "import os\n", + "from bob.bio.base.pipelines.vanilla_biometrics import checkpoint_vanilla_biometrics\n", + "from bob.bio.base.pipelines.vanilla_biometrics import dask_vanilla_biometrics\n", + "from bob.bio.base.pipelines.vanilla_biometrics import ZTNormPipeline, ZTNormCheckpointWrapper\n", + "from bob.bio.base.pipelines.vanilla_biometrics import CSVScoreWriter\n", + "\n", + "from bob.bio.face.embeddings.mxnet import arcface_insightFace_lresnet100\n", + "pipeline = arcface_insightFace_lresnet100(annotation_type=database.annotation_type,\n", + " fixed_positions=None,\n", + " memory_demanding=False)\n", + "\n", + "\n", + "## SCORE WRITER\n", + "# Here we want the pipeline to write using METADATA\n", + "pipeline.score_writer = CSVScoreWriter(os.path.join(OUTPUT_PATH, \"./tmp\"))\n", + "\n", + "\n", + "# Agregating with checkpoint\n", + "pipeline = checkpoint_vanilla_biometrics(pipeline, OUTPUT_PATH)\n", + "\n", + "\n", + "#pipeline = dask_vanilla_biometrics(ZTNormCheckpointWrapper(ZTNormPipeline(pipeline), OUTPUT_PATH))\n", + "# AGGREGATING WITH ZTNORM\n", + "pipeline = ZTNormPipeline(pipeline)\n", + "pipeline.ztnorm_solver = ZTNormCheckpointWrapper(\n", + " pipeline.ztnorm_solver, os.path.join(OUTPUT_PATH, \"normed-scores\")\n", + ")\n", + "pipeline = dask_vanilla_biometrics(pipeline, partition_size=200)\n", + "\n", + "print(pipeline.transformer)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setting the DASK client (optional step; do it if you want to use the grid)\n", + "\n", + "**HERE MAKE ABSOLUTELLY SURE THAT YOU DO `SETSHELL grid` BEFORE STARTING THE NOTEBOOK**\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from dask.distributed import Client\n", + "from bob.pipelines.distributed.sge import SGEMultipleQueuesCluster\n", + "\n", + "cluster = SGEMultipleQueuesCluster(min_jobs=1)\n", + "dask_client = Client(cluster)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As an example, we consider 10 samples from this database and extract features for these samples:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Running the vanilla Biometrics" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "def post_process_scores(pipeline, scores, path):\n", + " written_scores = pipeline.write_scores(scores)\n", + " return pipeline.post_process(written_scores, path) \n", + "\n", + "def _build_filename(score_file_name, suffix):\n", + " return os.path.join(score_file_name, suffix)\n", + "\n", + "from dask.delayed import Delayed\n", + "import dask.bag\n", + "def compute_scores(result, dask_client):\n", + " if isinstance(result, Delayed) or isinstance(result, dask.bag.Bag):\n", + " if dask_client is not None:\n", + " result = result.compute(scheduler=dask_client)\n", + " else:\n", + " print(\"`dask_client` not set. Your pipeline will run locally\")\n", + " result = result.compute(scheduler=\"single-threaded\")\n", + " return result\n", + "\n", + "background_model_samples = database.background_model_samples()\n", + "for group in [\"dev\",\"eval\"]: \n", + "\n", + " score_file_name = os.path.join(OUTPUT_PATH, f\"scores-{group}\")\n", + " biometric_references = database.references(group=group)\n", + " probes = database.probes(group=group)\n", + " \n", + " (\n", + " raw_scores,\n", + " z_normed_scores,\n", + " t_normed_scores,\n", + " zt_normed_scores,\n", + " s_normed_scores,\n", + " ) = pipeline(\n", + " background_model_samples,\n", + " biometric_references,\n", + " probes,\n", + " zprobes,\n", + " treferences,\n", + " allow_scoring_with_all_biometric_references=True,\n", + " ) \n", + " \n", + " \n", + " \n", + "\n", + " # Running RAW_SCORES\n", + "\n", + " raw_scores = post_process_scores(\n", + " pipeline, raw_scores, _build_filename(score_file_name, \"raw_scores\")\n", + " )\n", + " _ = compute_scores(raw_scores, dask_client)\n", + "\n", + " # Z-SCORES\n", + " z_normed_scores = post_process_scores(\n", + " pipeline,\n", + " z_normed_scores,\n", + " _build_filename(score_file_name, \"z_normed_scores\"),\n", + " )\n", + " _ = compute_scores(z_normed_scores, dask_client)\n", + "\n", + " # T-SCORES\n", + " t_normed_scores = post_process_scores(\n", + " pipeline,\n", + " t_normed_scores,\n", + " _build_filename(score_file_name, \"t_normed_scores\"),\n", + " )\n", + " _ = compute_scores(t_normed_scores, dask_client)\n", + "\n", + " # S-SCORES\n", + " s_normed_scores = post_process_scores(\n", + " pipeline,\n", + " s_normed_scores,\n", + " _build_filename(score_file_name, \"s_normed_scores\"),\n", + " )\n", + " _ = compute_scores(s_normed_scores, dask_client)\n", + "\n", + " # ZT-SCORES\n", + " zt_normed_scores = post_process_scores(\n", + " pipeline,\n", + " zt_normed_scores,\n", + " _build_filename(score_file_name, \"zt_normed_scores\"),\n", + " )\n", + " _ = compute_scores(zt_normed_scores, dask_client)\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In the following cells, we convert the extracted features to `numpy.array` and check the size of features." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# KILL THE SGE WORKERS\n", + "dask_client.shutdown()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} -- GitLab