Benchmarks

3e174653 · Tiago de Freitas Pereira · 800b8e49 · 3e174653 · 3e174653
Commit 3e174653 authored 3 years ago by Tiago de Freitas Pereira
--- a/notebooks/Extract_ArcFace_from_MOBIO.ipynb
+++ b/notebooks/Extract_ArcFace_from_MOBIO.ipynb
@@ -224,7 +224,11 @@
      "    self._check_started()\n",
      "  File \"/idiap/user/tpereira/conda/envs/bob.nightlies/lib/python3.7/site-packages/distributed/comm/tcp.py\", line 443, in _check_started\n",
      "    raise ValueError(\"invalid operation on non-started TCPListener\")\n",
-      "ValueError: invalid operation on non-started TCPListener\n"
+      "ValueError: invalid operation on non-started TCPListener\n",
+      "distributed.client - ERROR - Failed to reconnect to scheduler after 10.00 seconds, closing client\n",
+      "_GatheringFuture exception was never retrieved\n",
+      "future: <_GatheringFuture finished exception=CancelledError()>\n",
+      "concurrent.futures._base.CancelledError\n"
     ]
    }
   ],

 %% Cell type:markdown id: tags:

 # Extracting embedding features from face data
 In this notebook, we aim to extract embedding features from images using face recogntion extractors.
 As an example, we use MOBIO dataset, and extract Arcface features from the face images:

 %% Cell type:code id: tags:

 ``` python
 ##### CHANGE YOUR DATABASE HERE
 from bob.bio.face.config.database.mobio_male import database
 annotation_type = database.annotation_type
 fixed_positions = database.fixed_positions
 memory_demanding = True
 dask_client = None
 ```

 %% Cell type:code id: tags:

 ``` python
 from bob.bio.face.embeddings.mxnet import arcface_insightFace_lresnet100
 pipeline = arcface_insightFace_lresnet100(annotation_type=annotation_type,
                                          fixed_positions=fixed_positions,
                                          memory_demanding=memory_demanding)
 transformer = pipeline.transformer
 ```

 %% Cell type:code id: tags:

 ``` python
 from bob.pipelines import wrap


 features_dir = "features" #Path to store extracted features
 # Wrapping with CHECKPOINT and DASK
 transformer = wrap(["checkpoint","dask"],
                   transformer,
                   features_dir=features_dir)

 # Printing the setup of the transformer
 print(transformer)
 ```

 %% Output

    Pipeline(steps=[('ToDaskBag', ToDaskBag()),
                    ('samplewrapper-1',
                     DaskWrapper(estimator=CheckpointWrapper(estimator=SampleWrapper(estimator=FaceCrop(annotator=BobIpMTCNN(),
                                                                                                        cropped_image_size=(112,
                                                                                                                            112),
                                                                                                        cropped_positions={'leye': (55,
                                                                                                                                    81),
                                                                                                                           'reye': (55,
                                                                                                                                    42)}),
                                                                                     fit_extra_arguments=(),
                                                                                     transform_extra_arguments=(('annotations',
                                                                                                                 'annotations'),)),
                                                             features_dir='featur...
                                                             save_func=<function save at 0x7f345a1224d0>))),
                    ('samplewrapper-2',
                     DaskWrapper(estimator=CheckpointWrapper(estimator=SampleWrapper(estimator=ArcFaceInsightFace_LResNet100(memory_demanding=True),
                                                                                     fit_extra_arguments=(),
                                                                                     transform_extra_arguments=()),
                                                             features_dir='features/samplewrapper-2',
                                                             load_func=<function load at 0x7f345a122320>,
                                                             save_func=<function save at 0x7f345a1224d0>)))])

 %% Cell type:markdown id: tags:

 As an example, we consider 10 samples from this database and extract features for these samples:

 %% Cell type:code id: tags:

 ``` python
 # get 10 samples from database
 samples = database.all_samples()[:10]
 ```

 %% Cell type:code id: tags:

 ``` python
 # Setting the DASK client
 # HERE MAKE ABSOLUTELLY SURE THAT YOU DO `SETSHELL grid`
 # BEFORE STARTING THE NOTEBOOK

 from dask.distributed import Client
 from bob.pipelines.distributed.sge import SGEMultipleQueuesCluster

 cluster = SGEMultipleQueuesCluster(min_jobs=1)
 dask_client = Client(cluster)
 ```

 %% Cell type:code id: tags:

 ``` python

 features = transformer.transform(samples)
 if dask_client is not None:
    features = features.compute(scheduler=dask_client)
 ```

 %% Cell type:markdown id: tags:

 In the following cells, we convert the extracted features to `numpy.array` and check the size of features.

 %% Cell type:code id: tags:

 ``` python
 import numpy as np
 from bob.pipelines import SampleBatch

 np_features = np.array(SampleBatch(features))
 ```

 %% Cell type:code id: tags:

 ``` python
 np_features
 ```

 %% Output

    array([[[ 0.5345935 , -1.0668839 , -0.62798595, ..., -0.78859204,
             -0.5147211 ,  2.1415784 ]],
    
           [[ 0.24587776, -1.1436105 , -0.21513344, ..., -0.4950465 ,
             -0.7586405 ,  1.9262394 ]],
    
           [[-0.01235329, -1.0903177 , -0.7307515 , ..., -1.5341333 ,
             -0.9396954 ,  1.8103021 ]],
    
           ...,
    
           [[ 0.46007535, -0.9715014 , -0.52703196, ..., -0.29170716,
             -0.74297565,  1.8094344 ]],
    
           [[ 0.6113469 , -1.1828535 , -0.19491309, ..., -0.22889124,
             -0.58382076,  2.185493  ]],
    
           [[ 0.71980965, -0.4669612 , -0.49327967, ...,  0.0910981 ,
             -0.65268064,  0.93472594]]], dtype=float32)

 %% Cell type:code id: tags:

 ``` python
 # KILL THE SGE WORKERS
 dask_client.shutdown()
 ```

 %% Output

    tornado.application - ERROR - Exception in callback functools.partial(<function TCPServer._handle_connection.<locals>.<lambda> at 0x7f3470e7c8c0>, <Task finished coro=<BaseTCPListener._handle_stream() done, defined at /idiap/user/tpereira/conda/envs/bob.nightlies/lib/python3.7/site-packages/distributed/comm/tcp.py:445> exception=ValueError('invalid operation on non-started TCPListener')>)
    Traceback (most recent call last):
      File "/idiap/user/tpereira/conda/envs/bob.nightlies/lib/python3.7/site-packages/tornado/ioloop.py", line 743, in _run_callback
        ret = callback()
      File "/idiap/user/tpereira/conda/envs/bob.nightlies/lib/python3.7/site-packages/tornado/tcpserver.py", line 327, in <lambda>
        gen.convert_yielded(future), lambda f: f.result()
      File "/idiap/user/tpereira/conda/envs/bob.nightlies/lib/python3.7/site-packages/distributed/comm/tcp.py", line 451, in _handle_stream
        logger.debug("Incoming connection from %r to %r", address, self.contact_address)
      File "/idiap/user/tpereira/conda/envs/bob.nightlies/lib/python3.7/site-packages/distributed/comm/tcp.py", line 486, in contact_address
        host, port = self.get_host_port()
      File "/idiap/user/tpereira/conda/envs/bob.nightlies/lib/python3.7/site-packages/distributed/comm/tcp.py", line 467, in get_host_port
        self._check_started()
      File "/idiap/user/tpereira/conda/envs/bob.nightlies/lib/python3.7/site-packages/distributed/comm/tcp.py", line 443, in _check_started
        raise ValueError("invalid operation on non-started TCPListener")
    ValueError: invalid operation on non-started TCPListener
    tornado.application - ERROR - Exception in callback functools.partial(<function TCPServer._handle_connection.<locals>.<lambda> at 0x7f3470e7ce60>, <Task finished coro=<BaseTCPListener._handle_stream() done, defined at /idiap/user/tpereira/conda/envs/bob.nightlies/lib/python3.7/site-packages/distributed/comm/tcp.py:445> exception=ValueError('invalid operation on non-started TCPListener')>)
    Traceback (most recent call last):
      File "/idiap/user/tpereira/conda/envs/bob.nightlies/lib/python3.7/site-packages/tornado/ioloop.py", line 743, in _run_callback
        ret = callback()
      File "/idiap/user/tpereira/conda/envs/bob.nightlies/lib/python3.7/site-packages/tornado/tcpserver.py", line 327, in <lambda>
        gen.convert_yielded(future), lambda f: f.result()
      File "/idiap/user/tpereira/conda/envs/bob.nightlies/lib/python3.7/site-packages/distributed/comm/tcp.py", line 451, in _handle_stream
        logger.debug("Incoming connection from %r to %r", address, self.contact_address)
      File "/idiap/user/tpereira/conda/envs/bob.nightlies/lib/python3.7/site-packages/distributed/comm/tcp.py", line 486, in contact_address
        host, port = self.get_host_port()
      File "/idiap/user/tpereira/conda/envs/bob.nightlies/lib/python3.7/site-packages/distributed/comm/tcp.py", line 467, in get_host_port
        self._check_started()
      File "/idiap/user/tpereira/conda/envs/bob.nightlies/lib/python3.7/site-packages/distributed/comm/tcp.py", line 443, in _check_started
        raise ValueError("invalid operation on non-started TCPListener")
    ValueError: invalid operation on non-started TCPListener
+    distributed.client - ERROR - Failed to reconnect to scheduler after 10.00 seconds, closing client
+    _GatheringFuture exception was never retrieved
+    future: <_GatheringFuture finished exception=CancelledError()>
+    concurrent.futures._base.CancelledError

 %% Cell type:code id: tags:

 ``` python
 ```

--- a/notebooks/inject_samples.ipynb
+++ b/notebooks/inject_samples.ipynb
@@ -25,8 +25,8 @@
   "source": [
    "dask_client = None\n",
    "\n",
-    "OUTPUT_PATH = \"\"\n",
-    "PATH_INJECTED_DATA = \"\"\n",
+    "OUTPUT_PATH = \"YOUR_TEMP\"\n",
+    "PATH_INJECTED_DATA = \"/idiap/temp/parzul/db_gen_output/database_neutralized/image/00000/\"\n",
    "\n",
    "\n",
    "##### CHANGE YOUR DATABASE HERE\n",

 %% Cell type:markdown id: tags:

 # Injecting extra samples in vanilla biometrics protocols

 Sometimes our experiments go beyond "simple" database protocols.
 Sometimes we just want to analyze the impact of some extra samples in our experiments without writing a whole dataset intergace for that.

 This notebook shows how to "inject" samples that doesn't belong to any protocol to some existing protocol.
 We'll show case how to inject samples to perform score normalization.

 ## Preparing the database

 We'll show case how to perform this injection using the MEDS dataset.

 %% Cell type:code id: tags:

 ``` python
 dask_client = None

-OUTPUT_PATH = ""
-PATH_INJECTED_DATA = ""
+OUTPUT_PATH = "YOUR_TEMP"
+PATH_INJECTED_DATA = "/idiap/temp/parzul/db_gen_output/database_neutralized/image/00000/"


 ##### CHANGE YOUR DATABASE HERE
 from bob.bio.face.database import MEDSDatabase

 database = MEDSDatabase(protocol="verification_fold1")

 # Fetching the keys
 #references = database.zprobes()[0].references
 references = database.probes(group="eval")[0].references + database.probes(group="dev")[0].references
 ```

 %% Cell type:markdown id: tags:

 # Loading samples that will be injected

 Here we'll inject samples for znorm and tnorm

 %% Cell type:code id: tags:

 ``` python
 # PATH
 import os
 import functools
 import bob.io.base
 # Fetching real data
 #treferences = database.treferences()
 #zprobes = database.zprobes()

 eyes_annotations={'leye': (61, 120),
                  'reye': (61, 63)}


 treferences_lst = ["0/0_ethnicity_0.png",
                   "0/0_ethnicity_7.png"]

 zprobes_lst = ["1/1_ethnicity_0.png",
               "1/1_ethnicity_7.png"]

 from bob.pipelines import Sample, DelayedSample, SampleSet

 # Converting every element in a list in a sample set
 def list_to_sampleset(lst, base_path, eyes_annotations, references):
    sample_sets = []
    for i,l in enumerate(lst):
        sample = DelayedSample(functools.partial(bob.io.base.load,os.path.join(base_path,l)),
                               key=l,
                               reference_id=str(i),
                               annotations=eyes_annotations
                                )
        sset = SampleSet(samples=[sample],
                         key=l,
                         reference_id=str(i),
                         references=references)

        sample_sets.append(sset)
    return sample_sets


 treferences = list_to_sampleset(treferences_lst, PATH_INJECTED_DATA,eyes_annotations, references=None)
 zprobes = list_to_sampleset(zprobes_lst, PATH_INJECTED_DATA, eyes_annotations, references=references)
 ```

 %% Cell type:markdown id: tags:


 ## Preparing the pipeline

 Here we are using the arcface from insight face (https://github.com/deepinsight/insightface).
 Feel free to change it by looking at (`bob.bio.face.embeddings`).

 %% Cell type:code id: tags:

 ``` python
 import os
 from bob.bio.base.pipelines.vanilla_biometrics import checkpoint_vanilla_biometrics
 from bob.bio.base.pipelines.vanilla_biometrics import dask_vanilla_biometrics
 from bob.bio.base.pipelines.vanilla_biometrics import ZTNormPipeline, ZTNormCheckpointWrapper
 from bob.bio.base.pipelines.vanilla_biometrics import CSVScoreWriter

 from bob.bio.face.embeddings.mxnet import arcface_insightFace_lresnet100
 pipeline = arcface_insightFace_lresnet100(annotation_type=database.annotation_type,
                                          fixed_positions=None,
                                          memory_demanding=False)


 ## SCORE WRITER
 # Here we want the pipeline to write using METADATA
 pipeline.score_writer = CSVScoreWriter(os.path.join(OUTPUT_PATH, "./tmp"))


 # Agregating with checkpoint
 pipeline = checkpoint_vanilla_biometrics(pipeline, OUTPUT_PATH)


 #pipeline = dask_vanilla_biometrics(ZTNormCheckpointWrapper(ZTNormPipeline(pipeline), OUTPUT_PATH))
 # AGGREGATING WITH ZTNORM
 pipeline = ZTNormPipeline(pipeline)
 pipeline.ztnorm_solver = ZTNormCheckpointWrapper(
    pipeline.ztnorm_solver, os.path.join(OUTPUT_PATH, "normed-scores")
 )
 pipeline = dask_vanilla_biometrics(pipeline, partition_size=200)

 print(pipeline.transformer)
 ```

 %% Output

    Pipeline(steps=[('ToDaskBag', ToDaskBag(partition_size=200)),
                    ('samplewrapper-1',
                     DaskWrapper(estimator=CheckpointWrapper(estimator=SampleWrapper(estimator=FaceCrop(annotator=BobIpMTCNN(),
                                                                                                        cropped_image_size=(112,
                                                                                                                            112),
                                                                                                        cropped_positions={'leye': (55,
                                                                                                                                    81),
                                                                                                                           'reye': (55,
                                                                                                                                    42)}),
                                                                                     fit_extra_arguments=(),
                                                                                     transform_extra_arguments=(('annotations',
                                                                                                                 'annotations'),)),
                                                             fe...
                                                             save_func=<function save at 0x7fccf501c560>))),
                    ('samplewrapper-2',
                     DaskWrapper(estimator=CheckpointWrapper(estimator=SampleWrapper(estimator=ArcFaceInsightFace_LResNet100(),
                                                                                     fit_extra_arguments=(),
                                                                                     transform_extra_arguments=()),
                                                             features_dir='/idiap/temp/tpereira/inject-example/samplewrapper-2',
                                                             load_func=<function load at 0x7fccf501c3b0>,
                                                             save_func=<function save at 0x7fccf501c560>)))])

 %% Cell type:markdown id: tags:

 ## Setting the DASK client (optional step; do it if you want to use the grid)

 **HERE MAKE ABSOLUTELLY SURE THAT YOU DO `SETSHELL grid`  BEFORE STARTING THE NOTEBOOK**


 %% Cell type:code id: tags:

 ``` python
 from dask.distributed import Client
 from bob.pipelines.distributed.sge import SGEMultipleQueuesCluster

 cluster = SGEMultipleQueuesCluster(min_jobs=1)
 dask_client = Client(cluster)
 ```

 %% Cell type:markdown id: tags:

 As an example, we consider 10 samples from this database and extract features for these samples:

 %% Cell type:markdown id: tags:

 ## Running the vanilla Biometrics

 %% Cell type:code id: tags:

 ``` python
 import os
 def post_process_scores(pipeline, scores, path):
    written_scores = pipeline.write_scores(scores)
    return pipeline.post_process(written_scores, path)

 def _build_filename(score_file_name, suffix):
    return os.path.join(score_file_name, suffix)

 from dask.delayed import Delayed
 import dask.bag
 def compute_scores(result, dask_client):
    if isinstance(result, Delayed) or isinstance(result, dask.bag.Bag):
        if dask_client is not None:
            result = result.compute(scheduler=dask_client)
        else:
            print("`dask_client` not set. Your pipeline will run locally")
            result = result.compute(scheduler="single-threaded")
    return result

 background_model_samples = database.background_model_samples()
 for group in ["dev","eval"]:

    score_file_name = os.path.join(OUTPUT_PATH, f"scores-{group}")
    biometric_references = database.references(group=group)
    probes = database.probes(group=group)

    (
        raw_scores,
        z_normed_scores,
        t_normed_scores,
        zt_normed_scores,
        s_normed_scores,
    ) = pipeline(
        background_model_samples,
        biometric_references,
        probes,
        zprobes,
        treferences,
        allow_scoring_with_all_biometric_references=True,
    )




    # Running RAW_SCORES

    raw_scores = post_process_scores(
        pipeline, raw_scores, _build_filename(score_file_name, "raw_scores")
    )
    _ = compute_scores(raw_scores, dask_client)

    # Z-SCORES
    z_normed_scores = post_process_scores(
        pipeline,
        z_normed_scores,
        _build_filename(score_file_name, "z_normed_scores"),
    )
    _ = compute_scores(z_normed_scores, dask_client)

    # T-SCORES
    t_normed_scores = post_process_scores(
        pipeline,
        t_normed_scores,
        _build_filename(score_file_name, "t_normed_scores"),
    )
    _ = compute_scores(t_normed_scores, dask_client)

    # S-SCORES
    s_normed_scores = post_process_scores(
        pipeline,
        s_normed_scores,
        _build_filename(score_file_name, "s_normed_scores"),
    )
    _ = compute_scores(s_normed_scores, dask_client)

    # ZT-SCORES
    zt_normed_scores = post_process_scores(
        pipeline,
        zt_normed_scores,
        _build_filename(score_file_name, "zt_normed_scores"),
    )
    _ = compute_scores(zt_normed_scores, dask_client)

 ```

 %% Cell type:markdown id: tags:

 In the following cells, we convert the extracted features to `numpy.array` and check the size of features.

 %% Cell type:code id: tags:

 ``` python
 # KILL THE SGE WORKERS
 dask_client.shutdown()
 ```

 %% Cell type:code id: tags:

 ``` python
 ```