Commit e9d32305 authored by Tiago de Freitas Pereira's avatar Tiago de Freitas Pereira
Browse files

Inject samples example

parent 7488d20e
Pipeline #51281 failed with stage
in 37 minutes and 13 seconds
%% Cell type:markdown id: tags:
# Injecting extra samples in vanilla biometrics protocols
Sometimes our experiments go beyond "simple" database protocols.
Sometimes we just want to analyze the impact of some extra samples in our experiments without writing a whole dataset intergace for that.
This notebook shows how to "inject" samples that doesn't belong to any protocol to some existing protocol.
We'll show case how to inject samples to perform score normalization.
## Preparing the database
We'll show case how to perform this injection using the MEDS dataset.
%% Cell type:code id: tags:
``` python
dask_client = None
OUTPUT_PATH = ""
PATH_INJECTED_DATA = ""
##### CHANGE YOUR DATABASE HERE
from bob.bio.face.database import MEDSDatabase
database = MEDSDatabase(protocol="verification_fold1")
# Fetching the keys
#references = database.zprobes()[0].references
references = database.probes(group="eval")[0].references + database.probes(group="dev")[0].references
```
%% Cell type:markdown id: tags:
# Loading samples that will be injected
Here we'll inject samples for znorm and tnorm
%% Cell type:code id: tags:
``` python
# PATH
import os
import functools
import bob.io.base
# Fetching real data
#treferences = database.treferences()
#zprobes = database.zprobes()
eyes_annotations={'leye': (61, 120),
'reye': (61, 63)}
treferences_lst = ["0/0_ethnicity_0.png",
"0/0_ethnicity_7.png"]
zprobes_lst = ["1/1_ethnicity_0.png",
"1/1_ethnicity_7.png"]
from bob.pipelines import Sample, DelayedSample, SampleSet
# Converting every element in a list in a sample set
def list_to_sampleset(lst, base_path, eyes_annotations, references):
sample_sets = []
for i,l in enumerate(lst):
sample = DelayedSample(functools.partial(bob.io.base.load,os.path.join(base_path,l)),
key=l,
reference_id=str(i),
annotations=eyes_annotations
)
sset = SampleSet(samples=[sample],
key=l,
reference_id=str(i),
references=references)
sample_sets.append(sset)
return sample_sets
treferences = list_to_sampleset(treferences_lst, PATH_INJECTED_DATA,eyes_annotations, references=None)
zprobes = list_to_sampleset(zprobes_lst, PATH_INJECTED_DATA, eyes_annotations, references=references)
```
%% Cell type:markdown id: tags:
## Preparing the pipeline
Here we are using the arcface from insight face (https://github.com/deepinsight/insightface).
Feel free to change it by looking at (`bob.bio.face.embeddings`).
%% Cell type:code id: tags:
``` python
import os
from bob.bio.base.pipelines.vanilla_biometrics import checkpoint_vanilla_biometrics
from bob.bio.base.pipelines.vanilla_biometrics import dask_vanilla_biometrics
from bob.bio.base.pipelines.vanilla_biometrics import ZTNormPipeline, ZTNormCheckpointWrapper
from bob.bio.base.pipelines.vanilla_biometrics import CSVScoreWriter
from bob.bio.face.embeddings.mxnet import arcface_insightFace_lresnet100
pipeline = arcface_insightFace_lresnet100(annotation_type=database.annotation_type,
fixed_positions=None,
memory_demanding=False)
## SCORE WRITER
# Here we want the pipeline to write using METADATA
pipeline.score_writer = CSVScoreWriter(os.path.join(OUTPUT_PATH, "./tmp"))
# Agregating with checkpoint
pipeline = checkpoint_vanilla_biometrics(pipeline, OUTPUT_PATH)
#pipeline = dask_vanilla_biometrics(ZTNormCheckpointWrapper(ZTNormPipeline(pipeline), OUTPUT_PATH))
# AGGREGATING WITH ZTNORM
pipeline = ZTNormPipeline(pipeline)
pipeline.ztnorm_solver = ZTNormCheckpointWrapper(
pipeline.ztnorm_solver, os.path.join(OUTPUT_PATH, "normed-scores")
)
pipeline = dask_vanilla_biometrics(pipeline, partition_size=200)
print(pipeline.transformer)
```
%%%% Output: stream
Pipeline(steps=[('ToDaskBag', ToDaskBag(partition_size=200)),
('samplewrapper-1',
DaskWrapper(estimator=CheckpointWrapper(estimator=SampleWrapper(estimator=FaceCrop(annotator=BobIpMTCNN(),
cropped_image_size=(112,
112),
cropped_positions={'leye': (55,
81),
'reye': (55,
42)}),
fit_extra_arguments=(),
transform_extra_arguments=(('annotations',
'annotations'),)),
fe...
save_func=<function save at 0x7fccf501c560>))),
('samplewrapper-2',
DaskWrapper(estimator=CheckpointWrapper(estimator=SampleWrapper(estimator=ArcFaceInsightFace_LResNet100(),
fit_extra_arguments=(),
transform_extra_arguments=()),
features_dir='/idiap/temp/tpereira/inject-example/samplewrapper-2',
load_func=<function load at 0x7fccf501c3b0>,
save_func=<function save at 0x7fccf501c560>)))])
%% Cell type:markdown id: tags:
## Setting the DASK client (optional step; do it if you want to use the grid)
**HERE MAKE ABSOLUTELLY SURE THAT YOU DO `SETSHELL grid` BEFORE STARTING THE NOTEBOOK**
%% Cell type:code id: tags:
``` python
from dask.distributed import Client
from bob.pipelines.distributed.sge import SGEMultipleQueuesCluster
cluster = SGEMultipleQueuesCluster(min_jobs=1)
dask_client = Client(cluster)
```
%% Cell type:markdown id: tags:
As an example, we consider 10 samples from this database and extract features for these samples:
%% Cell type:markdown id: tags:
## Running the vanilla Biometrics
%% Cell type:code id: tags:
``` python
import os
def post_process_scores(pipeline, scores, path):
written_scores = pipeline.write_scores(scores)
return pipeline.post_process(written_scores, path)
def _build_filename(score_file_name, suffix):
return os.path.join(score_file_name, suffix)
from dask.delayed import Delayed
import dask.bag
def compute_scores(result, dask_client):
if isinstance(result, Delayed) or isinstance(result, dask.bag.Bag):
if dask_client is not None:
result = result.compute(scheduler=dask_client)
else:
print("`dask_client` not set. Your pipeline will run locally")
result = result.compute(scheduler="single-threaded")
return result
background_model_samples = database.background_model_samples()
for group in ["dev","eval"]:
score_file_name = os.path.join(OUTPUT_PATH, f"scores-{group}")
biometric_references = database.references(group=group)
probes = database.probes(group=group)
(
raw_scores,
z_normed_scores,
t_normed_scores,
zt_normed_scores,
s_normed_scores,
) = pipeline(
background_model_samples,
biometric_references,
probes,
zprobes,
treferences,
allow_scoring_with_all_biometric_references=True,
)
# Running RAW_SCORES
raw_scores = post_process_scores(
pipeline, raw_scores, _build_filename(score_file_name, "raw_scores")
)
_ = compute_scores(raw_scores, dask_client)
# Z-SCORES
z_normed_scores = post_process_scores(
pipeline,
z_normed_scores,
_build_filename(score_file_name, "z_normed_scores"),
)
_ = compute_scores(z_normed_scores, dask_client)
# T-SCORES
t_normed_scores = post_process_scores(
pipeline,
t_normed_scores,
_build_filename(score_file_name, "t_normed_scores"),
)
_ = compute_scores(t_normed_scores, dask_client)
# S-SCORES
s_normed_scores = post_process_scores(
pipeline,
s_normed_scores,
_build_filename(score_file_name, "s_normed_scores"),
)
_ = compute_scores(s_normed_scores, dask_client)
# ZT-SCORES
zt_normed_scores = post_process_scores(
pipeline,
zt_normed_scores,
_build_filename(score_file_name, "zt_normed_scores"),
)
_ = compute_scores(zt_normed_scores, dask_client)
```
%% Cell type:markdown id: tags:
In the following cells, we convert the extracted features to `numpy.array` and check the size of features.
%% Cell type:code id: tags:
``` python
# KILL THE SGE WORKERS
dask_client.shutdown()
```
%% Cell type:code id: tags:
``` python
```
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment