Skip to content
Snippets Groups Projects
Commit d9f5534b authored by Tiago de Freitas Pereira's avatar Tiago de Freitas Pereira
Browse files

Polishing ZT-Norm

parent a626b376
No related branches found
No related tags found
2 merge requests!188Score normalizations,!180[dask] Preparing bob.bio.base for dask pipelines
......@@ -10,7 +10,7 @@ from bob.pipelines import DelayedSample, Sample, SampleSet
import numpy as np
import dask
import functools
import pickle
import cloudpickle
import os
from .score_writers import FourColumnsScoreWriter
import logging
......@@ -78,6 +78,7 @@ class ZTNormPipeline(object):
allow_scoring_with_all_biometric_references=False,
):
self.transformer = self.train_background_model(background_model_samples)
# Create biometric samples
......@@ -128,11 +129,6 @@ class ZTNormPipeline(object):
allow_scoring_with_all_biometric_references,
)
# TODO: Do the score write
#if self.vanilla_biometrics_pipeline.score_writer is not None:
# return self.write_scores(scores)
return raw_scores, z_normed_scores, t_normed_scores, zt_normed_scores
def train_background_model(self, background_model_samples):
......@@ -166,8 +162,6 @@ class ZTNormPipeline(object):
allow_scoring_with_all_biometric_references=False,
):
# zprobe_samples = self._inject_references(zprobe_samples, biometric_references)
z_scores, z_probe_features = self.compute_scores(
zprobe_samples, biometric_references
)
......@@ -214,10 +208,6 @@ class ZTNormPipeline(object):
allow_scoring_with_all_biometric_references=False,
):
# z_probe_features = self._inject_references(
# z_probe_features, t_biometric_references
# )
# Reusing the zprobe_features and t_biometric_references
zt_scores = self.vanilla_biometrics_pipeline.biometric_algorithm.score_samples(
z_probe_features,
......@@ -258,20 +248,6 @@ class ZTNorm(object):
# Axi1 1=TNORM
return (score - mu) / std
"""
if axis == 1:
return (
score
- np.tile(mu.reshape(N, 1), (1, score.shape[1]))
) / np.tile(std.reshape(N, 1), (1, score.shape[1]))
else:
return (
score
- np.tile(mu.reshape(1, N), (score.shape[0], 1))
) / np.tile(std.reshape(1, N), (score.shape[0], 1))
"""
def _compute_std(self, mu, norm_base_scores, axis=1):
# Reference: https://gitlab.idiap.ch/bob/bob.learn.em/-/blob/master/bob/learn/em/test/test_ztnorm.py
# Axis 0=ZNORM
......@@ -317,16 +293,10 @@ class ZTNorm(object):
axis=0 computes CORRECTLY the statistics for ZNorm
axis=1 computes CORRECTLY the statistics for TNorm
"""
# Dumping all scores
if isinstance(sampleset_for_norm[0][0], DelayedSample):
score_floats = np.array(
[f.data for sset in sampleset_for_norm for s in sset for f in s.data]
)
else:
score_floats = np.array(
[s.data for sset in sampleset_for_norm for s in sset]
)
score_floats = np.array(
[s.data for sset in sampleset_for_norm for s in sset]
)
# Reshaping in PROBE vs BIOMETRIC_REFERENCES
n_probes = len(sampleset_for_norm)
......@@ -334,23 +304,18 @@ class ZTNorm(object):
score_floats = score_floats.reshape((n_probes, n_references))
# AXIS ON THE MODELS
big_mu = np.mean(score_floats, axis=axis)
#big_std = np.std(score_floats, axis=axis)
big_mu = np.mean(score_floats, axis=axis)
big_std = self._compute_std(big_mu, score_floats, axis=axis)
# Creating statistics structure with subject id as the key
stats = {}
if axis == 0:
# TODO: NEED TO SOLVE THIS FETCHING.
# IT SHOULD BE TRANSPARENT
if isinstance(sampleset_for_norm[0][0], DelayedSample):
sset = sampleset_for_norm[0].samples[0].data
else:
sset = sampleset_for_norm[0]
for mu, std, s in zip(big_mu, big_std, sset):
stats[s.subject] = {"big_mu": mu, "big_std": std}
# Z-Norm is one statistic per biometric references
biometric_reference_subjects = [br.subject for br in sampleset_for_norm[0]]
for mu, std, s in zip(big_mu, big_std, biometric_reference_subjects):
stats[s] = {"big_mu": mu, "big_std": std}
else:
# T-Norm is one statistic per probe
for mu, std, sset in zip(big_mu, big_std, sampleset_for_norm):
stats[sset.subject] = {"big_mu": mu, "big_std": std}
......@@ -359,7 +324,6 @@ class ZTNorm(object):
def _znorm_samplesets(self, probe_scores, stats):
# Normalizing
# TODO: THIS TENDS TO BE EXTREMLY SLOW
z_normed_score_samples = []
for probe_score in probe_scores:
z_normed_score_samples.append(self._apply_znorm(probe_score, stats))
......@@ -497,10 +461,10 @@ class ZTNormCheckpointWrapper(object):
def _write_scores(self, samples, path):
os.makedirs(os.path.dirname(path), exist_ok=True)
open(path, "wb").write(pickle.dumps(samples))
open(path, "wb").write(cloudpickle.dumps(samples))
def _load(self, path):
return pickle.loads(open(path, "rb").read())
return cloudpickle.loads(open(path, "rb").read())
def _apply_znorm(self, probe_score, stats):
......
......@@ -157,7 +157,7 @@ def vanilla_biometrics(
"""
def _compute_scores(result, dask_client):
if isinstance(result, Delayed):
if isinstance(result, Delayed) or isinstance(result, dask.bag.Bag):
if dask_client is not None:
result = result.compute(scheduler=dask_client)
else:
......@@ -254,11 +254,12 @@ def vanilla_biometrics(
def _build_filename(score_file_name, suffix):
return os.path.join(score_file_name, suffix)
# Running RAW_SCORES
# Running RAW_SCORES
raw_scores = _post_process_scores(
pipeline, raw_scores, _build_filename(score_file_name, "raw_scores")
)
_compute_scores(raw_scores, dask_client)
_ = _compute_scores(raw_scores, dask_client)
# Z-SCORES
z_normed_scores = _post_process_scores(
......@@ -266,7 +267,7 @@ def vanilla_biometrics(
z_normed_scores,
_build_filename(score_file_name, "z_normed_scores"),
)
_compute_scores(z_normed_scores, dask_client)
_ = _compute_scores(z_normed_scores, dask_client)
# T-SCORES
t_normed_scores = _post_process_scores(
......@@ -274,7 +275,7 @@ def vanilla_biometrics(
t_normed_scores,
_build_filename(score_file_name, "t_normed_scores"),
)
_compute_scores(t_normed_scores, dask_client)
_ = _compute_scores(t_normed_scores, dask_client)
# ZT-SCORES
zt_normed_scores = _post_process_scores(
......@@ -282,7 +283,7 @@ def vanilla_biometrics(
zt_normed_scores,
_build_filename(score_file_name, "zt_normed_scores"),
)
_compute_scores(zt_normed_scores, dask_client)
_ = _compute_scores(zt_normed_scores, dask_client)
else:
......@@ -296,7 +297,7 @@ def vanilla_biometrics(
post_processed_scores = _post_process_scores(
pipeline, result, score_file_name
)
_compute_scores(post_processed_scores, dask_client)
_ = _compute_scores(post_processed_scores, dask_client)
if dask_client is not None:
dask_client.shutdown()
......@@ -41,9 +41,9 @@ import copy
def zt_norm_stubs(references, probes, t_references, z_probes):
def _norm(scores, norm_base_scores, axis=1):
mu = np.mean(norm_base_scores, axis=axis)
#old = True
#if old:
# old = True
# if old:
# std = np.std(norm_base_scores, axis=axis)
# if axis == 1:
# return ((scores.T - mu) / std).T
......@@ -96,7 +96,6 @@ def zt_norm_stubs(references, probes, t_references, z_probes):
)
) / np.tile(std.reshape(1, norm_base_scores.shape[1]), (scores.shape[0], 1))
n_reference = references.shape[0]
n_probes = probes.shape[0]
n_t_references = t_references.shape[0]
......@@ -159,14 +158,7 @@ def test_norm_mechanics():
def _dump_scores_from_samples(scores, shape):
# We have to transpose because the tests are BIOMETRIC_REFERENCES vs PROBES
# and bob.bio.base is PROBES vs BIOMETRIC_REFERENCES
if isinstance(scores[0][0], DelayedSample):
return (
np.array([f.data for sset in scores for s in sset for f in s.data])
.reshape(shape)
.T
)
else:
return np.array([s.data for sset in scores for s in sset]).reshape(shape).T
return np.array([s.data for sset in scores for s in sset]).reshape(shape).T
with tempfile.TemporaryDirectory() as dir_name:
......@@ -394,8 +386,8 @@ def test_norm_mechanics():
# With checkpoing
run(False, with_checkpoint=True)
run(False, with_checkpoint=True)
# shutil.rmtree(dir_name) # Deleting the cache so it runs again from scratch
# os.makedirs(dir_name, exist_ok=True)
shutil.rmtree(dir_name) # Deleting the cache so it runs again from scratch
os.makedirs(dir_name, exist_ok=True)
# With dask
run(True) # On memory
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment