From e9d32305bdbc7c38481e474ca6b1cb9d5c39cb7f Mon Sep 17 00:00:00 2001
From: Tiago Freitas Pereira <tiagofrepereira@gmail.com>
Date: Mon, 7 Jun 2021 14:46:26 +0200
Subject: [PATCH] Inject samples example

---
 notebooks/inject_samples.ipynb | 350 +++++++++++++++++++++++++++++++++
 1 file changed, 350 insertions(+)
 create mode 100644 notebooks/inject_samples.ipynb

diff --git a/notebooks/inject_samples.ipynb b/notebooks/inject_samples.ipynb
new file mode 100644
index 00000000..52c2812a
--- /dev/null
+++ b/notebooks/inject_samples.ipynb
@@ -0,0 +1,350 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Injecting extra samples in vanilla biometrics protocols\n",
+    "\n",
+    "Sometimes our experiments go beyond \"simple\" database protocols.\n",
+    "Sometimes we just want to analyze the impact of some extra samples in our experiments without writing a whole dataset intergace for that.\n",
+    "\n",
+    "This notebook shows how to \"inject\" samples that doesn't belong to any protocol to some existing protocol.\n",
+    "We'll show case how to inject samples to perform score normalization.\n",
+    "\n",
+    "## Preparing the database\n",
+    "\n",
+    "We'll show case how to perform this injection using the MEDS dataset."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dask_client = None\n",
+    "\n",
+    "OUTPUT_PATH = \"\"\n",
+    "PATH_INJECTED_DATA = \"\"\n",
+    "\n",
+    "\n",
+    "##### CHANGE YOUR DATABASE HERE\n",
+    "from bob.bio.face.database import MEDSDatabase\n",
+    "\n",
+    "database = MEDSDatabase(protocol=\"verification_fold1\")\n",
+    "\n",
+    "# Fetching the keys\n",
+    "#references = database.zprobes()[0].references\n",
+    "references = database.probes(group=\"eval\")[0].references + database.probes(group=\"dev\")[0].references\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Loading samples that will be injected\n",
+    "\n",
+    "Here we'll inject samples for znorm and tnorm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# PATH\n",
+    "import os\n",
+    "import functools\n",
+    "import bob.io.base\n",
+    "# Fetching real data\n",
+    "#treferences = database.treferences()\n",
+    "#zprobes = database.zprobes()\n",
+    "\n",
+    "eyes_annotations={'leye': (61, 120),\n",
+    "                  'reye': (61, 63)}\n",
+    "\n",
+    "\n",
+    "treferences_lst = [\"0/0_ethnicity_0.png\",\n",
+    "                   \"0/0_ethnicity_7.png\"]\n",
+    "\n",
+    "zprobes_lst = [\"1/1_ethnicity_0.png\",\n",
+    "               \"1/1_ethnicity_7.png\"]\n",
+    "\n",
+    "from bob.pipelines import Sample, DelayedSample, SampleSet\n",
+    "\n",
+    "# Converting every element in a list in a sample set\n",
+    "def list_to_sampleset(lst, base_path, eyes_annotations, references):\n",
+    "    sample_sets = []\n",
+    "    for i,l in enumerate(lst):\n",
+    "        sample = DelayedSample(functools.partial(bob.io.base.load,os.path.join(base_path,l)),\n",
+    "                               key=l,\n",
+    "                               reference_id=str(i),\n",
+    "                               annotations=eyes_annotations\n",
+    "                                )\n",
+    "        sset = SampleSet(samples=[sample],\n",
+    "                         key=l,\n",
+    "                         reference_id=str(i),\n",
+    "                         references=references)\n",
+    "\n",
+    "        sample_sets.append(sset)\n",
+    "    return sample_sets\n",
+    "\n",
+    "\n",
+    "treferences = list_to_sampleset(treferences_lst, PATH_INJECTED_DATA,eyes_annotations, references=None)\n",
+    "zprobes = list_to_sampleset(zprobes_lst, PATH_INJECTED_DATA, eyes_annotations, references=references)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "\n",
+    "## Preparing the pipeline\n",
+    "\n",
+    "Here we are using the arcface from insight face (https://github.com/deepinsight/insightface).\n",
+    "Feel free to change it by looking at (`bob.bio.face.embeddings`)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Pipeline(steps=[('ToDaskBag', ToDaskBag(partition_size=200)),\n",
+      "                ('samplewrapper-1',\n",
+      "                 DaskWrapper(estimator=CheckpointWrapper(estimator=SampleWrapper(estimator=FaceCrop(annotator=BobIpMTCNN(),\n",
+      "                                                                                                    cropped_image_size=(112,\n",
+      "                                                                                                                        112),\n",
+      "                                                                                                    cropped_positions={'leye': (55,\n",
+      "                                                                                                                                81),\n",
+      "                                                                                                                       'reye': (55,\n",
+      "                                                                                                                                42)}),\n",
+      "                                                                                 fit_extra_arguments=(),\n",
+      "                                                                                 transform_extra_arguments=(('annotations',\n",
+      "                                                                                                             'annotations'),)),\n",
+      "                                                         fe...\n",
+      "                                                         save_func=<function save at 0x7fccf501c560>))),\n",
+      "                ('samplewrapper-2',\n",
+      "                 DaskWrapper(estimator=CheckpointWrapper(estimator=SampleWrapper(estimator=ArcFaceInsightFace_LResNet100(),\n",
+      "                                                                                 fit_extra_arguments=(),\n",
+      "                                                                                 transform_extra_arguments=()),\n",
+      "                                                         features_dir='/idiap/temp/tpereira/inject-example/samplewrapper-2',\n",
+      "                                                         load_func=<function load at 0x7fccf501c3b0>,\n",
+      "                                                         save_func=<function save at 0x7fccf501c560>)))])\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "from bob.bio.base.pipelines.vanilla_biometrics import checkpoint_vanilla_biometrics\n",
+    "from bob.bio.base.pipelines.vanilla_biometrics import dask_vanilla_biometrics\n",
+    "from bob.bio.base.pipelines.vanilla_biometrics import ZTNormPipeline, ZTNormCheckpointWrapper\n",
+    "from bob.bio.base.pipelines.vanilla_biometrics import CSVScoreWriter\n",
+    "\n",
+    "from bob.bio.face.embeddings.mxnet import arcface_insightFace_lresnet100\n",
+    "pipeline = arcface_insightFace_lresnet100(annotation_type=database.annotation_type,\n",
+    "                                          fixed_positions=None,\n",
+    "                                          memory_demanding=False)\n",
+    "\n",
+    "\n",
+    "## SCORE WRITER\n",
+    "# Here we want the pipeline to write using METADATA\n",
+    "pipeline.score_writer = CSVScoreWriter(os.path.join(OUTPUT_PATH, \"./tmp\"))\n",
+    "\n",
+    "\n",
+    "# Agregating with checkpoint\n",
+    "pipeline = checkpoint_vanilla_biometrics(pipeline, OUTPUT_PATH)\n",
+    "\n",
+    "\n",
+    "#pipeline = dask_vanilla_biometrics(ZTNormCheckpointWrapper(ZTNormPipeline(pipeline), OUTPUT_PATH))\n",
+    "# AGGREGATING WITH ZTNORM\n",
+    "pipeline = ZTNormPipeline(pipeline)\n",
+    "pipeline.ztnorm_solver = ZTNormCheckpointWrapper(\n",
+    "    pipeline.ztnorm_solver, os.path.join(OUTPUT_PATH, \"normed-scores\")\n",
+    ")\n",
+    "pipeline = dask_vanilla_biometrics(pipeline, partition_size=200)\n",
+    "\n",
+    "print(pipeline.transformer)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Setting the DASK client (optional step; do it if you want to use the grid)\n",
+    "\n",
+    "**HERE MAKE ABSOLUTELLY SURE THAT YOU DO `SETSHELL grid`  BEFORE STARTING THE NOTEBOOK**\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from dask.distributed import Client\n",
+    "from bob.pipelines.distributed.sge import SGEMultipleQueuesCluster\n",
+    "\n",
+    "cluster = SGEMultipleQueuesCluster(min_jobs=1)\n",
+    "dask_client = Client(cluster)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "As an example, we consider 10 samples from this database and extract features for these samples:"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Running the vanilla Biometrics"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "def post_process_scores(pipeline, scores, path):\n",
+    "    written_scores = pipeline.write_scores(scores)\n",
+    "    return pipeline.post_process(written_scores, path)    \n",
+    "\n",
+    "def _build_filename(score_file_name, suffix):\n",
+    "    return os.path.join(score_file_name, suffix)\n",
+    "\n",
+    "from dask.delayed import Delayed\n",
+    "import dask.bag\n",
+    "def compute_scores(result, dask_client):\n",
+    "    if isinstance(result, Delayed) or isinstance(result, dask.bag.Bag):\n",
+    "        if dask_client is not None:\n",
+    "            result = result.compute(scheduler=dask_client)\n",
+    "        else:\n",
+    "            print(\"`dask_client` not set. Your pipeline will run locally\")\n",
+    "            result = result.compute(scheduler=\"single-threaded\")\n",
+    "    return result\n",
+    "\n",
+    "background_model_samples = database.background_model_samples()\n",
+    "for group in [\"dev\",\"eval\"]:    \n",
+    "\n",
+    "    score_file_name = os.path.join(OUTPUT_PATH, f\"scores-{group}\")\n",
+    "    biometric_references = database.references(group=group)\n",
+    "    probes = database.probes(group=group)\n",
+    "    \n",
+    "    (\n",
+    "        raw_scores,\n",
+    "        z_normed_scores,\n",
+    "        t_normed_scores,\n",
+    "        zt_normed_scores,\n",
+    "        s_normed_scores,\n",
+    "    ) = pipeline(\n",
+    "        background_model_samples,\n",
+    "        biometric_references,\n",
+    "        probes,\n",
+    "        zprobes,\n",
+    "        treferences,\n",
+    "        allow_scoring_with_all_biometric_references=True,\n",
+    "    )        \n",
+    "    \n",
+    "    \n",
+    "    \n",
+    "\n",
+    "    # Running RAW_SCORES\n",
+    "\n",
+    "    raw_scores = post_process_scores(\n",
+    "        pipeline, raw_scores, _build_filename(score_file_name, \"raw_scores\")\n",
+    "    )\n",
+    "    _ = compute_scores(raw_scores, dask_client)\n",
+    "\n",
+    "    # Z-SCORES\n",
+    "    z_normed_scores = post_process_scores(\n",
+    "        pipeline,\n",
+    "        z_normed_scores,\n",
+    "        _build_filename(score_file_name, \"z_normed_scores\"),\n",
+    "    )\n",
+    "    _ = compute_scores(z_normed_scores, dask_client)\n",
+    "\n",
+    "    # T-SCORES\n",
+    "    t_normed_scores = post_process_scores(\n",
+    "        pipeline,\n",
+    "        t_normed_scores,\n",
+    "        _build_filename(score_file_name, \"t_normed_scores\"),\n",
+    "    )\n",
+    "    _ = compute_scores(t_normed_scores, dask_client)\n",
+    "\n",
+    "    # S-SCORES\n",
+    "    s_normed_scores = post_process_scores(\n",
+    "        pipeline,\n",
+    "        s_normed_scores,\n",
+    "        _build_filename(score_file_name, \"s_normed_scores\"),\n",
+    "    )\n",
+    "    _ = compute_scores(s_normed_scores, dask_client)\n",
+    "\n",
+    "    # ZT-SCORES\n",
+    "    zt_normed_scores = post_process_scores(\n",
+    "        pipeline,\n",
+    "        zt_normed_scores,\n",
+    "        _build_filename(score_file_name, \"zt_normed_scores\"),\n",
+    "    )\n",
+    "    _ = compute_scores(zt_normed_scores, dask_client)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In the following cells, we convert the extracted features to `numpy.array` and check the size of features."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# KILL THE SGE WORKERS\n",
+    "dask_client.shutdown()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
-- 
GitLab