Skip to content
Snippets Groups Projects
Commit d458eba4 authored by Tiago de Freitas Pereira's avatar Tiago de Freitas Pereira
Browse files

Changed the scoring function to dump one score per sampleset.

parent c95c92d6
No related branches found
No related tags found
2 merge requests!188Score normalizations,!180[dask] Preparing bob.bio.base for dask pipelines
from .pipelines import VanillaBiometricsPipeline
from .pipelines import VanillaBiometricsPipeline, ZNormVanillaBiometricsPipeline
from .biometric_algorithms import Distance
from .score_writers import FourColumnsScoreWriter, CSVScoreWriter
from .wrappers import BioAlgorithmCheckpointWrapper, BioAlgorithmDaskWrapper, dask_vanilla_biometrics
......
......@@ -5,6 +5,15 @@
from abc import ABCMeta, abstractmethod
from bob.pipelines.sample import Sample, SampleSet, DelayedSample
import functools
import numpy as np
def average_scores(scores):
"""
Given a :any:`numpy.ndarray` coming from multiple probes,
average them
"""
return np.mean(scores, axis=0)
class BioAlgorithm(metaclass=ABCMeta):
......@@ -13,10 +22,17 @@ class BioAlgorithm(metaclass=ABCMeta):
biometric model enrollement, via ``enroll()`` and scoring, with
``score()``.
Parameters
----------
score_reduction_operation: ``collections.callable``
Callable containing the score reduction function to be applied in the samples in a sampleset
"""
def __init__(self, **kwargs):
def __init__(self, score_reduction_operation=average_scores,**kwargs):
self.stacked_biometric_references = None
self.score_reduction_operation = average_scores
def enroll_samples(self, biometric_references):
"""This method should implement the sub-pipeline 1 of the Vanilla Biometrics Pipeline :ref:`_vanilla-pipeline-1`.
......@@ -113,53 +129,63 @@ class BioAlgorithm(metaclass=ABCMeta):
biometric_references,
allow_scoring_with_all_biometric_references,
):
"""Given a sampleset for probing, compute the scores and returns a sample set with the scores
"""Given one sampleset for probing, compute the scores and returns a sample set with the scores
"""
# Stacking the samples from a sampleset
data = [s.data for s in sampleset.samples]
# Compute scores for each sample inside of the sample set
# TODO: In some cases we want to compute 1 score per sampleset (IJB-C)
# We should add an aggregator function here so we can properly aggregator samples from
# a sampleset either after or before scoring.
# To be honest, this should be the default behavior
retval = []
for subprobe_id, (s, parent) in enumerate(zip(data, sampleset.samples)):
# Creating one sample per comparison
subprobe_scores = []
scores_biometric_references = []
if allow_scoring_with_all_biometric_references:
# Optimized scoring
# This is useful when you scoring function can be compared with a
# static batch of biometric references
total_scores = []
for probe_sample in sampleset:
if allow_scoring_with_all_biometric_references:
# Multiple scoring
if self.stacked_biometric_references is None:
self.stacked_biometric_references = [
ref.data for ref in biometric_references
]
scores = self.score_multiple_biometric_references(
self.stacked_biometric_references, s
self.stacked_biometric_references, probe_sample.data
)
# Wrapping the scores in samples
for ref, score in zip(biometric_references, scores):
subprobe_scores.append(Sample(score, parent=ref))
else:
total_scores.append(scores)
# Reducing them
total_scores = self.score_reduction_operation(total_scores)
# Wrapping the scores in samples
for ref, score in zip(biometric_references, total_scores):
scores_biometric_references.append(Sample(score, parent=ref))
else:
# Non optimizing scoring
# There are some protocols where each probe has
# to be scored with a specific list of biometric_references
total_scores = []
for probe_sample in sampleset:
scores = []
for ref in [
r for r in biometric_references if r.key in sampleset.references
r for r in biometric_references if str(r.subject) in sampleset.references
]:
score = self.score(ref.data, s)
subprobe_scores.append(Sample(score, parent=ref))
# Fetching metadata from the probe
kwargs = dict(
(metadata, sampleset.__dict__[metadata])
for metadata in sampleset.__dict__.keys()
if metadata not in ["samples", "key", "data", "load", "_data"]
)
subprobe = SampleSet(subprobe_scores, parent=parent, **kwargs)
retval.append(subprobe)
scores.append(self.score(ref.data, probe_sample.data))
total_scores.append(scores)
total_scores = self.score_reduction_operation(np.array(total_scores))
for ref, score in zip([
r for r in biometric_references if str(r.subject) in sampleset.references
], total_scores):
scores_biometric_references.append(Sample(score, parent=ref))
# Fetching metadata from the probe
kwargs = dict(
(metadata, sampleset.__dict__[metadata])
for metadata in sampleset.__dict__.keys()
if metadata not in ["samples", "key", "data", "load", "_data"]
)
return SampleSet(scores_biometric_references, parent=sampleset, **kwargs)
return retval
@abstractmethod
def score(self, biometric_reference, data):
......
......@@ -120,7 +120,7 @@ class VanillaBiometricsPipeline(object):
biometric_reference_features = self.transformer.transform(
biometric_reference_samples
)
biometric_references = self.biometric_algorithm.enroll_samples(
biometric_reference_features
)
......@@ -137,7 +137,7 @@ class VanillaBiometricsPipeline(object):
# probes is a list of SampleSets
probe_features = self.transformer.transform(probe_samples)
scores = self.biometric_algorithm.score_samples(
probe_features,
biometric_references,
......@@ -146,3 +146,69 @@ class VanillaBiometricsPipeline(object):
# scores is a list of Samples
return scores
class ZNormVanillaBiometricsPipeline(VanillaBiometricsPipeline):
def __init__(self, vanilla_biometrics_pipeline):
self.vanilla_biometrics_pipeline = vanilla_biometrics_pipeline
def __call__(
self,
background_model_samples,
biometric_reference_samples,
probe_samples,
zprobe_samples,
allow_scoring_with_all_biometric_references=False,
):
self.transformer = self.train_background_model(background_model_samples)
# Create biometric samples
biometric_references = self.create_biometric_reference(
biometric_reference_samples
)
raw_scores = self.vanilla_biometrics_pipeline(
background_model_samples,
biometric_reference_samples,
probe_samples,
allow_scoring_with_all_biometric_references,
)
return self.compute_znorm_scores(
zprobe_samples, raw_scores, biometric_references
)
def train_background_model(self, background_model_samples):
return self.vanilla_biometrics_pipeline.train_background_model(
background_model_samples
)
def create_biometric_reference(self, biometric_reference_samples):
return self.vanilla_biometrics_pipeline.create_biometric_reference(
biometric_reference_samples
)
def compute_scores(
self,
probe_samples,
biometric_references,
allow_scoring_with_all_biometric_references=False,
):
return self.vanilla_biometrics_pipeline.compute_scores(
probe_samples,
biometric_references,
allow_scoring_with_all_biometric_references,
)
def compute_znorm_scores(self, zprobe_samples, probe_scores, biometric_references):
import ipdb; ipdb.set_trace()
z_scores = self.vanilla_biometrics_pipeline.compute_scores(
zprobe_samples, biometric_references
)
pass
......@@ -20,33 +20,30 @@ class FourColumnsScoreWriter(ScoreWriter):
Write scores and returns a :any:`bob.pipelines.DelayedSample` containing
the instruction to open the score file
"""
os.makedirs(path, exist_ok=True)
checkpointed_scores = []
for probe in probe_sampleset:
lines = [
"{0} {1} {2} {3}\n".format(
biometric_reference.subject,
probe.subject,
probe.key,
biometric_reference.data,
)
for biometric_reference in probe
]
filename = os.path.join(path, str(probe.subject)) + ".txt"
open(filename, "w").writelines(lines)
checkpointed_scores.append(
SampleSet(
[
DelayedSample(
functools.partial(self.read, filename), parent=probe
)
],
parent=probe,
)
lines = [
"{0} {1} {2} {3}\n".format(
biometric_reference.subject,
probe_sampleset.subject,
probe_sampleset.key,
biometric_reference.data,
)
return checkpointed_scores
for biometric_reference in probe_sampleset
]
filename = os.path.join(path, str(probe_sampleset.subject)) + ".txt"
open(filename, "w").writelines(lines)
return SampleSet(
[
DelayedSample(
functools.partial(self.read, filename), parent=probe_sampleset
)
],
parent=probe_sampleset,
)
def read(self, path):
"""
......@@ -54,16 +51,15 @@ class FourColumnsScoreWriter(ScoreWriter):
"""
return open(path).readlines()
def concatenate_write_scores(self, samplesets_list, filename):
def concatenate_write_scores(self, samplesets, filename):
"""
Given a list of samplsets, write them all in a single file
"""
os.makedirs(os.path.dirname(filename), exist_ok=True)
f = open(filename, "w")
for samplesets in samplesets_list:
for sset in samplesets:
for s in sset:
f.writelines(s.data)
for sset in samplesets:
for scores in sset:
f.writelines(scores.data)
class CSVScoreWriter(ScoreWriter):
......@@ -115,40 +111,36 @@ class CSVScoreWriter(ScoreWriter):
os.makedirs(path, exist_ok=True)
checkpointed_scores = []
header, probe_dict, bioref_dict = create_csv_header(probe_sampleset[0])
header, probe_dict, bioref_dict = create_csv_header(probe_sampleset)
for probe in probe_sampleset:
filename = os.path.join(path, str(probe.subject)) + ".csv"
with open(filename, "w") as f:
filename = os.path.join(path, str(probe_sampleset.subject)) + ".csv"
with open(filename, "w") as f:
csv_write = csv.writer(f)
csv_write.writerow(header)
csv_write = csv.writer(f)
csv_write.writerow(header)
rows = []
probe_row = [str(probe.key)] + [
str(probe.__dict__[k]) for k in probe_dict.keys()
rows = []
probe_row = [str(probe_sampleset.key)] + [
str(probe_sampleset.__dict__[k]) for k in probe_dict.keys()
]
for biometric_reference in probe_sampleset:
bio_ref_row = [
str(biometric_reference.__dict__[k])
for k in list(bioref_dict.keys()) + ["data"]
]
for biometric_reference in probe:
bio_ref_row = [
str(biometric_reference.__dict__[k])
for k in list(bioref_dict.keys()) + ["data"]
]
rows.append(probe_row + bio_ref_row)
csv_write.writerows(rows)
checkpointed_scores.append(
SampleSet(
[
DelayedSample(
functools.partial(self.read, filename), parent=probe
)
],
parent=probe,
rows.append(probe_row + bio_ref_row)
csv_write.writerows(rows)
return SampleSet(
[
DelayedSample(
functools.partial(self.read, filename), parent=probe_sampleset
)
)
return checkpointed_scores
],
parent=probe_sampleset,
)
def read(self, path):
"""
......@@ -156,7 +148,7 @@ class CSVScoreWriter(ScoreWriter):
"""
return open(path).readlines()
def concatenate_write_scores(self, samplesets_list, filename):
def concatenate_write_scores(self, samplesets, filename):
"""
Given a list of samplsets, write them all in a single file
"""
......@@ -167,8 +159,8 @@ class CSVScoreWriter(ScoreWriter):
base_dir = os.path.splitext(filename)[0]
os.makedirs(base_dir, exist_ok=True)
f = None
for i, samplesets in enumerate(samplesets_list):
if i% self.n_sample_sets==0:
for i, sset in enumerate(samplesets):
if i % self.n_sample_sets == 0:
if f is not None:
f.close()
del f
......@@ -176,10 +168,9 @@ class CSVScoreWriter(ScoreWriter):
filename = os.path.join(base_dir, f"chunk_{i}.csv")
f = open(filename, "w")
for sset in samplesets:
for s in sset:
if i==0:
f.writelines(s.data)
else:
f.writelines(s.data[1:])
samplesets_list[i] = None
\ No newline at end of file
for scores in sset:
if i == 0:
f.writelines(scores.data)
else:
f.writelines(scores.data[1:])
sset.samples = None
......@@ -61,10 +61,10 @@ class DummyDatabase:
for i in range(offset, offset + n_samples)
]
def _create_random_sample_set(self, n_sample_set=10, n_samples=2):
def _create_random_sample_set(self, n_sample_set=10, n_samples=2, seed=10):
# Just generate random samples
np.random.seed(10)
np.random.seed(seed)
sample_set = [
SampleSet(
samples=[],
......@@ -89,21 +89,30 @@ class DummyDatabase:
return sample_set
def background_model_samples(self):
samples = [sset.samples for sset in self._create_random_sample_set()]
samples = [sset.samples for sset in self._create_random_sample_set(seed=10)]
return list(itertools.chain(*samples))
def references(self):
return self._create_random_sample_set(self.n_references, self.dim)
return self._create_random_sample_set(self.n_references, self.dim, seed=11)
def probes(self):
probes = []
probes = self._create_random_sample_set(n_sample_set=10, n_samples=1)
probes = self._create_random_sample_set(n_sample_set=10, n_samples=1, seed=12)
for p in probes:
p.references = list(range(self.n_references))
p.references = [str(r) for r in list(range(self.n_references))]
return probes
def zprobes(self):
zprobes = []
zprobes = self._create_random_sample_set(n_sample_set=10, n_samples=1, seed=13)
for p in zprobes:
p.references = [str(r) for r in list(range(self.n_references))]
return zprobes
@property
def allow_scoring_with_all_biometric_references(self):
return True
......@@ -140,7 +149,7 @@ def test_on_memory():
with tempfile.TemporaryDirectory() as dir_name:
def run_pipeline(with_dask):
def run_pipeline(with_dask, allow_scoring_with_all_biometric_references):
database = DummyDatabase()
transformer = _make_transformer(dir_name)
......@@ -160,23 +169,24 @@ def test_on_memory():
database.background_model_samples(),
database.references(),
database.probes(),
allow_scoring_with_all_biometric_references=database.allow_scoring_with_all_biometric_references,
allow_scoring_with_all_biometric_references=allow_scoring_with_all_biometric_references,
)
if with_dask:
scores = scores.compute(scheduler="single-threaded")
assert len(scores) == 10
for probe_ssets in scores:
for probe in probe_ssets:
assert len(probe) == 10
for sample_scores in scores:
assert len(sample_scores) == 10
for score in sample_scores:
assert isinstance(score.data, float)
run_pipeline(False)
run_pipeline(False) # Testing checkpoint
run_pipeline(False, True)
run_pipeline(False, False) # Testing checkpoint
shutil.rmtree(dir_name) # Deleting the cache so it runs again from scratch
os.makedirs(dir_name, exist_ok=True)
run_pipeline(True)
run_pipeline(True) # Testing checkpoint
run_pipeline(True, True)
run_pipeline(True, True) # Testing checkpoint
def test_checkpoint_bioalg_as_transformer():
......@@ -207,6 +217,7 @@ def test_checkpoint_bioalg_as_transformer():
database.probes(),
allow_scoring_with_all_biometric_references=database.allow_scoring_with_all_biometric_references,
)
if with_dask:
scores = scores.compute(scheduler="single-threaded")
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment