Skip to content
Snippets Groups Projects
Commit d03dc86d authored by Amir MOHAMMADI's avatar Amir MOHAMMADI
Browse files

stable API hopefuly

parent 475aa543
No related branches found
No related tags found
No related merge requests found
......@@ -15,9 +15,8 @@ class Algorithm(object):
"""docstring for Algorithm"""
def __init__(self,
performs_training=False,
has_closed_form_solution=False,
preprocessors=None,
classifier=None,
*args,
**kwargs
):
......@@ -29,8 +28,7 @@ class Algorithm(object):
"""
super(Algorithm, self).__init__()
self.performs_training = performs_training
self.has_closed_form_solution = has_closed_form_solution
self.classifier = classifier
self.preprocessors = preprocessors
self._kwargs = kwargs
self._kwargs['preprocessors'] = preprocessors
......@@ -47,21 +45,43 @@ class Algorithm(object):
return scores
def train(self, train, devel=None):
if devel is None:
devel = train
(negatives, positives) = train
train_scores = np.vstack((negatives, positives))
neg_len = negatives.shape[0]
y = np.zeros((train_scores.shape[0],), dtype='bool')
y[neg_len:] = True
self.fit(train_scores, y)
self.classifier.fit(train_scores, y)
def fuse(self, scores):
return self.decision_function(scores)
if hasattr(self, 'classifier'):
return self.classifier.decision_function(scores)
else:
return self.decision_function(scores)
def __str__(self):
"""__str__() -> info
def plot_boundary_decision(self, score_labels, threshold,
label_system1='',
label_system2='',
This function returns all parameters of this class (and its derived class).
**Returns:**
info : str
A string containing the full information of all parameters of this
(and the derived) class.
"""
return "%s(%s)" % (str(self.__class__), ", ".join(
["%s=%s" % (key, value) for key, value in
self._kwargs.items() if value is not None]))
def save(self, model_file):
with open(model_file, "wb") as f:
pickle.dump(self, f)
def load(self, model_file):
with open(model_file, "rb") as f:
return pickle.load(f)
def plot_boundary_decision(self, scores, score_labels, threshold,
thres_system1=None,
thres_system2=None,
do_grouping=False,
......@@ -77,15 +97,16 @@ class Algorithm(object):
'''
Plots the boundary decision of the Algorithm
@param score_labels numpy.array A (self.scores.shape[0]) array containing
the true labels of self.scores.
@param score_labels numpy.array A (scores.shape[0]) array containing
the true labels of scores.
@param threshold float threshold of the decision boundary
'''
if legends is None:
legends = ['Impostor', 'Genuine']
markers = ['x', 'o']
if self.scores.shape[1] > 2:
if scores.shape[1] > 2:
raise NotImplementedError(
"Currently plotting the decision boundary for more than two systems "
"is not supported.")
......@@ -93,28 +114,21 @@ class Algorithm(object):
import matplotlib.pyplot as plt
plt.gca() # this is necessary for subplots to work.
X = self.scores[:, [i1, i2]]
X = scores[:, [i1, i2]]
Y = score_labels
x_min, x_max = X[:, i1].min() - x_pad, X[:, i1].max() + x_pad
y_min, y_max = X[:, i2].min() - y_pad, X[:, i2].max() + y_pad
h1 = abs(x_max - x_min) / resolution
h2 = abs(y_max - y_min) / resolution
if self.has_closed_form_solution and self.scores.shape[1] == 2:
x1 = np.arange(x_min, x_max, h1)
x2 = self.closed_form(x1, threshold)
plt.plot(x1, x2, cmap=plt.cm.viridis)
else:
xx, yy = np.meshgrid(
np.arange(x_min, x_max, h1), np.arange(y_min, y_max, h2))
scores = self.scores
self.scores = np.c_[xx.ravel(), yy.ravel()]
Z = (self() > threshold).reshape(xx.shape)
self.scores = scores
xx, yy = np.meshgrid(
np.linspace(x_min, x_max, resolution),
np.linspace(y_min, y_max, resolution))
temp = np.c_[xx.ravel(), yy.ravel()]
temp = self.preprocess(temp)
Z = (self.fuse(temp) > threshold).reshape(xx.shape)
contourf = plt.contour(xx, yy, Z, 1, alpha=1, cmap=plt.cm.viridis)
contourf = plt.contour(xx, yy, Z, 1, alpha=1, cmap=plt.cm.viridis)
if do_grouping:
positives, negatives = X[Y], X[np.logical_not(Y)]
negatives, positives = X[np.logical_not(Y)], X[Y]
negatives, positives = grouping(negatives, positives, **kwargs)
X = np.concatenate((negatives, positives), axis=0)
Y = np.concatenate(
......@@ -122,35 +136,19 @@ class Algorithm(object):
np.ones(positives.shape[0], dtype=np.bool8)),
axis=0)
plt.scatter(
X[:, 0], X[:, 1], c=Y, alpha=alpha, cmap=plt.cm.viridis)
# plt.legend(legends)
negatives, positives = X[np.logical_not(Y)], X[Y]
colors = plt.cm.viridis(np.linspace(0, 1, 2))
for i, X in enumerate((negatives, positives)):
plt.scatter(
X[:, 0], X[:, 1], marker=markers[i], alpha=alpha,
c=colors[i], label=legends[i])
plt.legend()
if thres_system1 is not None:
plt.axvline(thres_system1, color='red')
plt.axhline(thres_system2, color='red')
return contourf
def __str__(self):
"""__str__() -> info
This function returns all parameters of this class (and its derived class).
**Returns:**
info : str
A string containing the full information of all parameters of this
(and the derived) class.
"""
return "%s(%s)" % (str(self.__class__), ", ".join(
["%s=%s" % (key, value) for key, value in
self._kwargs.items() if value is not None]))
def save(self, model_file):
with open(model_file, "wb") as f:
pickle.dump(self, f)
plt.xlim([x_min, x_max])
plt.ylim([y_min, y_max])
def load(self, model_file):
with open(model_file, "rb") as f:
return pickle.load(f)
return contourf
#!/usr/bin/env python
from __future__ import division
from __future__ import absolute_import
import bob.learn.linear
from sklearn.linear_model import LogisticRegression as LogisticRegression_SK
from .Algorithm import Algorithm
import bob.core
logger = bob.core.log.setup("bob.fusion.base")
class LogisticRegression(Algorithm, LogisticRegression_SK):
__doc__ = LogisticRegression_SK.__doc__
def __init__(self,
*args, **kwargs):
Algorithm.__init__(
self, performs_training=True,
has_closed_form_solution=True, *args, **kwargs)
sk_kwargs = {}
for key, value in kwargs.items():
if key in ['penalty', 'dual', 'tol', 'C', 'fit_intercept',
'intercept_scaling', 'class_weight',
'random_state', 'solver', 'max_iter',
'multi_class', 'verbose', 'warm_start', 'n_jobs']:
sk_kwargs[key] = value
LogisticRegression_SK.__init__(self, **sk_kwargs)
def closed_form(self, x1, y):
w1 = self.coef_[0]
w2 = self.coef_[1]
x2 = (y - self.intercept_ - x1*w1)/w2
return x2
......@@ -6,7 +6,7 @@ from __future__ import absolute_import
import bob.learn.mlp
import bob.core.random
import bob.io.base
import numpy
import pickle
from .Algorithm import Algorithm
from .mlp_train_helper import MLPTrainer
......@@ -23,27 +23,19 @@ class MLP(Algorithm):
def __init__(self,
n_systems=2,
hidden_layers=None,
trainer_devel=None,
seed=None,
machine=None,
trainer=None,
*args, **kwargs):
# chicken and egg :D call __init__ twice.
super(MLP, self).__init__(performs_training=True, *args, **kwargs)
super(MLP, self).__init__(
classifier=self,
*args, **kwargs)
if hidden_layers is None:
hidden_layers = [3]
if self.scores is not None:
n_systems = numpy.asarray(self.scores).shape[1]
self.mlp_shape = [n_systems] + hidden_layers + [1]
super(MLP, self).__init__(
performs_training=True, mlp_shape=self.mlp_shape, seed=seed,
machine=str(machine), trainer=str(trainer),
*args, **kwargs)
self.seed = seed
self.machine = machine
self.trainer = trainer
self.trainer_devel = trainer_devel if trainer_devel else \
self.trainer_scores
self._my_kwargs = kwargs
self.initialize()
......@@ -59,20 +51,16 @@ class MLP(Algorithm):
bob.learn.mlp.RProp(1, bob.learn.mlp.SquareError(
self.machine.output_activation), machine=self.machine,
train_biases=False)
self._kwargs = {
'seed': self.seed,
'mlp_shape': self.mlp_shape,
'machine': self.machine,
'train': self.train,
}
def prepare_train(self):
self.trainer_devel = self.trainer_devel if self.trainer_devel else \
self.trainer_scores
self.train_helper = MLPTrainer(
train=self.trainer_scores[::-1],
devel=self.trainer_devel[::-1],
mlp_shape=self.mlp_shape,
machine=self.machine,
trainer=self.trainer,
**self._my_kwargs)
def fit(self, train_scores, y):
n_systems = train_scores.shape[1]
def prepare_train(self, train, devel):
(negatives, positives) = train
n_systems = negatives.shape[1]
if n_systems != self.mlp_shape[0]:
logger.warn(
'Reinitializing the MLP machine with the shape of {} to {} to match th'
......@@ -81,8 +69,18 @@ class MLP(Algorithm):
self.n_systems = n_systems
self.hidden_layers = self.mlp_shape[1:-1]
self.initialize(force=True)
self.trainer_scores = (train_scores[numpy.logical_not(y)], train_scores[y])
self.prepare_train()
self.train_helper = MLPTrainer(
train=train[::-1],
devel=devel[::-1],
mlp_shape=self.mlp_shape,
machine=self.machine,
trainer=self.trainer,
**self._my_kwargs)
def train(self, train, devel=None):
if devel is None:
devel = train
self.prepare_train(train, devel)
self.machine, self.analyzer = self.train_helper()
def decision_function(self, scores):
......@@ -91,16 +89,30 @@ class MLP(Algorithm):
scores = scores.ravel()
return scores
def _get_hdf5_file(self, model_file):
return model_file[:-3] + 'hdf5'
def save(self, model_file):
d5 = bob.io.base.HDF5File(model_file, "w")
d5 = bob.io.base.HDF5File(self._get_hdf5_file(model_file), "w")
try:
self.machine.save(d5)
finally:
d5.close()
# dump preprocessors in a pickle file because
# we don't know how they look like
with open(model_file, 'wb') as f:
pickle.dump(self.preprocessors, f)
def load(self, model_file):
d5 = bob.io.base.HDF5File(model_file)
d5 = bob.io.base.HDF5File(self._get_hdf5_file(model_file))
try:
self.machine.load(d5)
finally:
d5.close()
# load preprocessors
with open(model_file, "rb") as f:
self.preprocessors = pickle.load(f)
return self
......@@ -16,8 +16,9 @@ class Weighted_Sum(Algorithm):
def __init__(self, weights=None, *args, **kwargs):
super(Weighted_Sum, self).__init__(
performs_training=False, weights=weights,
has_closed_form_solution=True, *args, **kwargs)
classifier=self,
weights=weights,
*args, **kwargs)
self.weights = weights
def fit(self, X, y):
......@@ -30,4 +31,10 @@ class Weighted_Sum(Algorithm):
return numpy.sum(scores * self.weights, axis=1)
def closed_form(self, x1, y):
return 2*y - x1
if self.weights is None:
return 2*y - x1
else:
w1 = self.weights[0]
w2 = self.weights[1]
x2 = (y - x1*w1)/w2
return x2
from .Algorithm import Algorithm
from .Weighted_Sum import Weighted_Sum
from .LogisticRegression import LogisticRegression
from .MLP import MLP
# gets sphinx autodoc done right - don't remove it
......
#!/usr/bin/env python
import bob.fusion.base
import sklearn.preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
algorithm = bob.fusion.base.algorithm.LogisticRegression(
preprocessors=[(sklearn.preprocessing.RobustScaler(), False)])
algorithm = bob.fusion.base.algorithm.Algorithm(
preprocessors=[StandardScaler()],
classifier=LogisticRegression())
#!/usr/bin/env python
import bob.fusion.base
import sklearn.preprocessing
from sklearn.preprocessing import StandardScaler
algorithm = bob.fusion.base.algorithm.MLP(
preprocessors=[(sklearn.preprocessing.RobustScaler(), False)])
preprocessors=[StandardScaler()])
#!/usr/bin/env python
import bob.fusion.base
import sklearn.preprocessing
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.linear_model import LogisticRegression
algorithm = bob.fusion.base.algorithm.LogisticRegression(
preprocessors=[(sklearn.preprocessing.RobustScaler(), False),
(sklearn.preprocessing.PolynomialFeatures(degree=2), False)])
algorithm = bob.fusion.base.algorithm.Algorithm(
preprocessors=[StandardScaler(), PolynomialFeatures(degree=2)],
classifier=LogisticRegression())
from . import fuse
from . import plot_fusion_decision_boundary
......@@ -9,7 +9,7 @@ import numpy as np
from bob.io.base import create_directories_safe
from bob.measure.load import load_score, get_all_scores,\
get_negatives_positives_all
get_negatives_positives_all, dump_score
from bob.bio.base import utils
from ..tools import parse_arguments, write_info
......@@ -21,10 +21,6 @@ logger = bob.core.log.setup("bob.fusion.base")
def fuse(args, command_line_parameters):
"""Do the actual fusion."""
algorithm = args.algorithm
if args.score_type == 4:
fmt = '%s %s %s %.6f'
else:
fmt = '%s %s %s %s %.6f'
write_info(args, command_line_parameters)
......@@ -53,15 +49,23 @@ def fuse(args, command_line_parameters):
assert(np.all(score_lines['claimed_id'] == score_lines0['claimed_id']))
assert(np.all(score_lines['real_id'] == score_lines0['real_id']))
# train the preprocessors
algorithm.train_preprocessors(scores_dev)
# preprocess data
scores_dev = algorithm.preprocess(scores_dev)
scores_eval = algorithm.preprocess(scores_eval)
neg, pos = trainer_scores
neg, pos = algorithm.preprocess(neg), algorithm.preprocess(pos)
trainer_scores = (neg, pos)
# train the model
if utils.check_file(args.model_file, args.force, 1000):
logger.info(
"- Fusion: model '%s' already exists.", args.model_file)
algorithm = algorithm.load(args.model_file)
algorithm.trainer_scores = trainer_scores
elif algorithm.performs_training:
algorithm.trainer_scores = trainer_scores
algorithm.train()
else:
algorithm.train(trainer_scores)
algorithm.save(args.model_file)
# fuse the scores (dev)
......@@ -69,12 +73,11 @@ def fuse(args, command_line_parameters):
logger.info(
"- Fusion: scores '%s' already exists.", args.fused_dev_file)
else:
algorithm.scores = scores_dev
fused_scores_dev = algorithm()
score_lines = np.array(score_lines_list_dev[0])
fused_scores_dev = algorithm.fuse(scores_dev)
score_lines = score_lines_list_dev[0]
score_lines['score'] = fused_scores_dev
create_directories_safe(os.path.dirname(args.fused_dev_file))
np.savetxt(args.fused_dev_file, score_lines, fmt=fmt)
dump_score(args.fused_dev_file, score_lines)
# fuse the scores (eval)
if args.eval_files:
......@@ -82,12 +85,11 @@ def fuse(args, command_line_parameters):
logger.info(
"- Fusion: scores '%s' already exists.", args.fused_eval_file)
else:
algorithm.scores = scores_eval
fused_scores_eval = algorithm()
score_lines = np.array(score_lines_list_eval[0])
fused_scores_eval = algorithm.fuse(scores_eval)
score_lines = score_lines_list_eval[0]
score_lines['score'] = fused_scores_eval
create_directories_safe(os.path.dirname(args.fused_eval_file))
np.savetxt(args.fused_eval_file, score_lines, fmt=fmt)
dump_score(args.fused_eval_file, score_lines)
def main(command_line_parameters=None):
......
#!/usr/bin/env python
"""Plot decision boundraries of the fusion algorithm.
Usage:
plot_fusion_decision_boundary.py SCORE_FILE SCORE_FILE MODEL_FILE
[-v... | --verbose...] [options]
plot_fusion_decision_boundary.py (-h | --help)
plot_fusion_decision_boundary.py (-V | --version)
Options:
-o, --output PLOT_FILE The path to save the plot. [default: scatter.pdf]
--score-type {4,5} The format the scores are provided. [default: 4]
-v, --verbose Increase the verbosity level from 0 (only error
messages) to 1 (warnings), 2 (log messages), 3 (debug
information) by adding the --verbose option as often
as desired (e.g. '-vvv' for debug). [default: 0]
-a, --algorithm Algorithm The fusion that was used during fusion if they
implement a different load method e.g.
bob.fusion.base.algorithm.MLP.
[default: bob.fusion.base.algorithm.Algorithm]
-g, --group N If given scores will be grouped into N samples.
[default: 500]
--grouping {random, kmeans} The gouping algorithm used. [default: kmeans]
-h --help Show this screen.
-V, --version Show version.
"""
from docopt import docopt
import matplotlib.pyplot as plt
import numpy
import bob.fusion.base
import bob.core
from bob.measure.load import load_score, get_negatives_positives,\
get_all_scores
from bob.measure import eer_threshold
logger = bob.core.log.setup("bob.fusion.base")
def main(command_line_parameters=None):
args = docopt(__doc__, argv=command_line_parameters,
version=bob.fusion.base.get_config())
print(args)
bob.core.log.set_verbosity_level(logger, args['--verbose'])
# load the algorithm
algorithm = eval('{}()'.format(args['--algorithm']))
algorithm = algorithm.load(args['MODEL_FILE'])
# load the scores
score_lines_list = [
load_score(path, int(args['--score-type'])) for path in args['SCORE_FILE']]
scores = get_all_scores(score_lines_list)
score_lines = numpy.array(score_lines_list[0])
score_lines['score'] = algorithm.fuse(algorithm.preprocess(scores))
threshold = eer_threshold(*get_negatives_positives(score_lines))
score_labels = score_lines['claimed_id'] == score_lines['real_id']
# plot the decision boundary
algorithm.plot_boundary_decision(
scores, score_labels, threshold,
do_grouping=True,
npoints=int(args['--group']),
seed=0,
gformat=args['--grouping']
)
plt.savefig(args['--output'])
plt.close()
if __name__ == '__main__':
main()
......@@ -3,8 +3,9 @@ numpy
bob.core
bob.extension
bob.measure
bob.learn.linear
bob.learn.em
bob.learn.activation
bob.learn.mlp
bob.bio.base
scikit-learn
matplotlib # for plotting
docopt # for plotting script
\ No newline at end of file
......@@ -102,6 +102,7 @@ setup(
# scripts should be declared using this entry:
'console_scripts': [
'fuse.py = bob.fusion.base.script.fuse:main',
'plot_fusion_decision_boundary.py = bob.fusion.base.script.plot_fusion_decision_boundary:main',
],
'bob.fusion.algorithm': [
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment