Commit 3e174653 authored by Tiago de Freitas Pereira's avatar Tiago de Freitas Pereira
Browse files

Benchmarks

parent 800b8e49
Pipeline #51377 failed with stage
in 52 minutes and 21 seconds
%% Cell type:markdown id: tags:
# Extracting embedding features from face data
In this notebook, we aim to extract embedding features from images using face recogntion extractors.
As an example, we use MOBIO dataset, and extract Arcface features from the face images:
%% Cell type:code id: tags:
``` python
##### CHANGE YOUR DATABASE HERE
from bob.bio.face.config.database.mobio_male import database
annotation_type = database.annotation_type
fixed_positions = database.fixed_positions
memory_demanding = True
dask_client = None
```
%% Cell type:code id: tags:
``` python
from bob.bio.face.embeddings.mxnet import arcface_insightFace_lresnet100
pipeline = arcface_insightFace_lresnet100(annotation_type=annotation_type,
fixed_positions=fixed_positions,
memory_demanding=memory_demanding)
transformer = pipeline.transformer
```
%% Cell type:code id: tags:
``` python
from bob.pipelines import wrap
features_dir = "features" #Path to store extracted features
# Wrapping with CHECKPOINT and DASK
transformer = wrap(["checkpoint","dask"],
transformer,
features_dir=features_dir)
# Printing the setup of the transformer
print(transformer)
```
%%%% Output: stream
Pipeline(steps=[('ToDaskBag', ToDaskBag()),
('samplewrapper-1',
DaskWrapper(estimator=CheckpointWrapper(estimator=SampleWrapper(estimator=FaceCrop(annotator=BobIpMTCNN(),
cropped_image_size=(112,
112),
cropped_positions={'leye': (55,
81),
'reye': (55,
42)}),
fit_extra_arguments=(),
transform_extra_arguments=(('annotations',
'annotations'),)),
features_dir='featur...
save_func=<function save at 0x7f345a1224d0>))),
('samplewrapper-2',
DaskWrapper(estimator=CheckpointWrapper(estimator=SampleWrapper(estimator=ArcFaceInsightFace_LResNet100(memory_demanding=True),
fit_extra_arguments=(),
transform_extra_arguments=()),
features_dir='features/samplewrapper-2',
load_func=<function load at 0x7f345a122320>,
save_func=<function save at 0x7f345a1224d0>)))])
%% Cell type:markdown id: tags:
As an example, we consider 10 samples from this database and extract features for these samples:
%% Cell type:code id: tags:
``` python
# get 10 samples from database
samples = database.all_samples()[:10]
```
%% Cell type:code id: tags:
``` python
# Setting the DASK client
# HERE MAKE ABSOLUTELLY SURE THAT YOU DO `SETSHELL grid`
# BEFORE STARTING THE NOTEBOOK
from dask.distributed import Client
from bob.pipelines.distributed.sge import SGEMultipleQueuesCluster
cluster = SGEMultipleQueuesCluster(min_jobs=1)
dask_client = Client(cluster)
```
%% Cell type:code id: tags:
``` python
features = transformer.transform(samples)
if dask_client is not None:
features = features.compute(scheduler=dask_client)
```
%% Cell type:markdown id: tags:
In the following cells, we convert the extracted features to `numpy.array` and check the size of features.
%% Cell type:code id: tags:
``` python
import numpy as np
from bob.pipelines import SampleBatch
np_features = np.array(SampleBatch(features))
```
%% Cell type:code id: tags:
``` python
np_features
```
%%%% Output: execute_result
array([[[ 0.5345935 , -1.0668839 , -0.62798595, ..., -0.78859204,
-0.5147211 , 2.1415784 ]],
[[ 0.24587776, -1.1436105 , -0.21513344, ..., -0.4950465 ,
-0.7586405 , 1.9262394 ]],
[[-0.01235329, -1.0903177 , -0.7307515 , ..., -1.5341333 ,
-0.9396954 , 1.8103021 ]],
...,
[[ 0.46007535, -0.9715014 , -0.52703196, ..., -0.29170716,
-0.74297565, 1.8094344 ]],
[[ 0.6113469 , -1.1828535 , -0.19491309, ..., -0.22889124,
-0.58382076, 2.185493 ]],
[[ 0.71980965, -0.4669612 , -0.49327967, ..., 0.0910981 ,
-0.65268064, 0.93472594]]], dtype=float32)
%% Cell type:code id: tags:
``` python
# KILL THE SGE WORKERS
dask_client.shutdown()
```
%%%% Output: stream
tornado.application - ERROR - Exception in callback functools.partial(<function TCPServer._handle_connection.<locals>.<lambda> at 0x7f3470e7c8c0>, <Task finished coro=<BaseTCPListener._handle_stream() done, defined at /idiap/user/tpereira/conda/envs/bob.nightlies/lib/python3.7/site-packages/distributed/comm/tcp.py:445> exception=ValueError('invalid operation on non-started TCPListener')>)
Traceback (most recent call last):
File "/idiap/user/tpereira/conda/envs/bob.nightlies/lib/python3.7/site-packages/tornado/ioloop.py", line 743, in _run_callback
ret = callback()
File "/idiap/user/tpereira/conda/envs/bob.nightlies/lib/python3.7/site-packages/tornado/tcpserver.py", line 327, in <lambda>
gen.convert_yielded(future), lambda f: f.result()
File "/idiap/user/tpereira/conda/envs/bob.nightlies/lib/python3.7/site-packages/distributed/comm/tcp.py", line 451, in _handle_stream
logger.debug("Incoming connection from %r to %r", address, self.contact_address)
File "/idiap/user/tpereira/conda/envs/bob.nightlies/lib/python3.7/site-packages/distributed/comm/tcp.py", line 486, in contact_address
host, port = self.get_host_port()
File "/idiap/user/tpereira/conda/envs/bob.nightlies/lib/python3.7/site-packages/distributed/comm/tcp.py", line 467, in get_host_port
self._check_started()
File "/idiap/user/tpereira/conda/envs/bob.nightlies/lib/python3.7/site-packages/distributed/comm/tcp.py", line 443, in _check_started
raise ValueError("invalid operation on non-started TCPListener")
ValueError: invalid operation on non-started TCPListener
tornado.application - ERROR - Exception in callback functools.partial(<function TCPServer._handle_connection.<locals>.<lambda> at 0x7f3470e7ce60>, <Task finished coro=<BaseTCPListener._handle_stream() done, defined at /idiap/user/tpereira/conda/envs/bob.nightlies/lib/python3.7/site-packages/distributed/comm/tcp.py:445> exception=ValueError('invalid operation on non-started TCPListener')>)
Traceback (most recent call last):
File "/idiap/user/tpereira/conda/envs/bob.nightlies/lib/python3.7/site-packages/tornado/ioloop.py", line 743, in _run_callback
ret = callback()
File "/idiap/user/tpereira/conda/envs/bob.nightlies/lib/python3.7/site-packages/tornado/tcpserver.py", line 327, in <lambda>
gen.convert_yielded(future), lambda f: f.result()
File "/idiap/user/tpereira/conda/envs/bob.nightlies/lib/python3.7/site-packages/distributed/comm/tcp.py", line 451, in _handle_stream
logger.debug("Incoming connection from %r to %r", address, self.contact_address)
File "/idiap/user/tpereira/conda/envs/bob.nightlies/lib/python3.7/site-packages/distributed/comm/tcp.py", line 486, in contact_address
host, port = self.get_host_port()
File "/idiap/user/tpereira/conda/envs/bob.nightlies/lib/python3.7/site-packages/distributed/comm/tcp.py", line 467, in get_host_port
self._check_started()
File "/idiap/user/tpereira/conda/envs/bob.nightlies/lib/python3.7/site-packages/distributed/comm/tcp.py", line 443, in _check_started
raise ValueError("invalid operation on non-started TCPListener")
ValueError: invalid operation on non-started TCPListener
distributed.client - ERROR - Failed to reconnect to scheduler after 10.00 seconds, closing client
_GatheringFuture exception was never retrieved
future: <_GatheringFuture finished exception=CancelledError()>
concurrent.futures._base.CancelledError
%% Cell type:code id: tags:
``` python
```
......
%% Cell type:markdown id: tags:
# Injecting extra samples in vanilla biometrics protocols
Sometimes our experiments go beyond "simple" database protocols.
Sometimes we just want to analyze the impact of some extra samples in our experiments without writing a whole dataset intergace for that.
This notebook shows how to "inject" samples that doesn't belong to any protocol to some existing protocol.
We'll show case how to inject samples to perform score normalization.
## Preparing the database
We'll show case how to perform this injection using the MEDS dataset.
%% Cell type:code id: tags:
``` python
dask_client = None
OUTPUT_PATH = ""
PATH_INJECTED_DATA = ""
OUTPUT_PATH = "YOUR_TEMP"
PATH_INJECTED_DATA = "/idiap/temp/parzul/db_gen_output/database_neutralized/image/00000/"
##### CHANGE YOUR DATABASE HERE
from bob.bio.face.database import MEDSDatabase
database = MEDSDatabase(protocol="verification_fold1")
# Fetching the keys
#references = database.zprobes()[0].references
references = database.probes(group="eval")[0].references + database.probes(group="dev")[0].references
```
%% Cell type:markdown id: tags:
# Loading samples that will be injected
Here we'll inject samples for znorm and tnorm
%% Cell type:code id: tags:
``` python
# PATH
import os
import functools
import bob.io.base
# Fetching real data
#treferences = database.treferences()
#zprobes = database.zprobes()
eyes_annotations={'leye': (61, 120),
'reye': (61, 63)}
treferences_lst = ["0/0_ethnicity_0.png",
"0/0_ethnicity_7.png"]
zprobes_lst = ["1/1_ethnicity_0.png",
"1/1_ethnicity_7.png"]
from bob.pipelines import Sample, DelayedSample, SampleSet
# Converting every element in a list in a sample set
def list_to_sampleset(lst, base_path, eyes_annotations, references):
sample_sets = []
for i,l in enumerate(lst):
sample = DelayedSample(functools.partial(bob.io.base.load,os.path.join(base_path,l)),
key=l,
reference_id=str(i),
annotations=eyes_annotations
)
sset = SampleSet(samples=[sample],
key=l,
reference_id=str(i),
references=references)
sample_sets.append(sset)
return sample_sets
treferences = list_to_sampleset(treferences_lst, PATH_INJECTED_DATA,eyes_annotations, references=None)
zprobes = list_to_sampleset(zprobes_lst, PATH_INJECTED_DATA, eyes_annotations, references=references)
```
%% Cell type:markdown id: tags:
## Preparing the pipeline
Here we are using the arcface from insight face (https://github.com/deepinsight/insightface).
Feel free to change it by looking at (`bob.bio.face.embeddings`).
%% Cell type:code id: tags:
``` python
import os
from bob.bio.base.pipelines.vanilla_biometrics import checkpoint_vanilla_biometrics
from bob.bio.base.pipelines.vanilla_biometrics import dask_vanilla_biometrics
from bob.bio.base.pipelines.vanilla_biometrics import ZTNormPipeline, ZTNormCheckpointWrapper
from bob.bio.base.pipelines.vanilla_biometrics import CSVScoreWriter
from bob.bio.face.embeddings.mxnet import arcface_insightFace_lresnet100
pipeline = arcface_insightFace_lresnet100(annotation_type=database.annotation_type,
fixed_positions=None,
memory_demanding=False)
## SCORE WRITER
# Here we want the pipeline to write using METADATA
pipeline.score_writer = CSVScoreWriter(os.path.join(OUTPUT_PATH, "./tmp"))
# Agregating with checkpoint
pipeline = checkpoint_vanilla_biometrics(pipeline, OUTPUT_PATH)
#pipeline = dask_vanilla_biometrics(ZTNormCheckpointWrapper(ZTNormPipeline(pipeline), OUTPUT_PATH))
# AGGREGATING WITH ZTNORM
pipeline = ZTNormPipeline(pipeline)
pipeline.ztnorm_solver = ZTNormCheckpointWrapper(
pipeline.ztnorm_solver, os.path.join(OUTPUT_PATH, "normed-scores")
)
pipeline = dask_vanilla_biometrics(pipeline, partition_size=200)
print(pipeline.transformer)
```
%%%% Output: stream
Pipeline(steps=[('ToDaskBag', ToDaskBag(partition_size=200)),
('samplewrapper-1',
DaskWrapper(estimator=CheckpointWrapper(estimator=SampleWrapper(estimator=FaceCrop(annotator=BobIpMTCNN(),
cropped_image_size=(112,
112),
cropped_positions={'leye': (55,
81),
'reye': (55,
42)}),
fit_extra_arguments=(),
transform_extra_arguments=(('annotations',
'annotations'),)),
fe...
save_func=<function save at 0x7fccf501c560>))),
('samplewrapper-2',
DaskWrapper(estimator=CheckpointWrapper(estimator=SampleWrapper(estimator=ArcFaceInsightFace_LResNet100(),
fit_extra_arguments=(),
transform_extra_arguments=()),
features_dir='/idiap/temp/tpereira/inject-example/samplewrapper-2',
load_func=<function load at 0x7fccf501c3b0>,
save_func=<function save at 0x7fccf501c560>)))])
%% Cell type:markdown id: tags:
## Setting the DASK client (optional step; do it if you want to use the grid)
**HERE MAKE ABSOLUTELLY SURE THAT YOU DO `SETSHELL grid` BEFORE STARTING THE NOTEBOOK**
%% Cell type:code id: tags:
``` python
from dask.distributed import Client
from bob.pipelines.distributed.sge import SGEMultipleQueuesCluster
cluster = SGEMultipleQueuesCluster(min_jobs=1)
dask_client = Client(cluster)
```
%% Cell type:markdown id: tags:
As an example, we consider 10 samples from this database and extract features for these samples:
%% Cell type:markdown id: tags:
## Running the vanilla Biometrics
%% Cell type:code id: tags:
``` python
import os
def post_process_scores(pipeline, scores, path):
written_scores = pipeline.write_scores(scores)
return pipeline.post_process(written_scores, path)
def _build_filename(score_file_name, suffix):
return os.path.join(score_file_name, suffix)
from dask.delayed import Delayed
import dask.bag
def compute_scores(result, dask_client):
if isinstance(result, Delayed) or isinstance(result, dask.bag.Bag):
if dask_client is not None:
result = result.compute(scheduler=dask_client)
else:
print("`dask_client` not set. Your pipeline will run locally")
result = result.compute(scheduler="single-threaded")
return result
background_model_samples = database.background_model_samples()
for group in ["dev","eval"]:
score_file_name = os.path.join(OUTPUT_PATH, f"scores-{group}")
biometric_references = database.references(group=group)
probes = database.probes(group=group)
(
raw_scores,
z_normed_scores,
t_normed_scores,
zt_normed_scores,
s_normed_scores,
) = pipeline(
background_model_samples,
biometric_references,
probes,
zprobes,
treferences,
allow_scoring_with_all_biometric_references=True,
)
# Running RAW_SCORES
raw_scores = post_process_scores(
pipeline, raw_scores, _build_filename(score_file_name, "raw_scores")
)
_ = compute_scores(raw_scores, dask_client)
# Z-SCORES
z_normed_scores = post_process_scores(
pipeline,
z_normed_scores,
_build_filename(score_file_name, "z_normed_scores"),
)
_ = compute_scores(z_normed_scores, dask_client)
# T-SCORES
t_normed_scores = post_process_scores(
pipeline,
t_normed_scores,
_build_filename(score_file_name, "t_normed_scores"),
)
_ = compute_scores(t_normed_scores, dask_client)
# S-SCORES
s_normed_scores = post_process_scores(
pipeline,
s_normed_scores,
_build_filename(score_file_name, "s_normed_scores"),
)
_ = compute_scores(s_normed_scores, dask_client)
# ZT-SCORES
zt_normed_scores = post_process_scores(
pipeline,
zt_normed_scores,
_build_filename(score_file_name, "zt_normed_scores"),
)
_ = compute_scores(zt_normed_scores, dask_client)
```
%% Cell type:markdown id: tags:
In the following cells, we convert the extracted features to `numpy.array` and check the size of features.
%% Cell type:code id: tags:
``` python
# KILL THE SGE WORKERS
dask_client.shutdown()
```
%% Cell type:code id: tags:
``` python
```
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment