Skip to content
Snippets Groups Projects
Commit 02b51788 authored by Manuel Günther's avatar Manuel Günther
Browse files

Added first algorithm (PCA) and extractor (Linearize)

parent faad4b36
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Manuel Guenther <Manuel.Guenther@idiap.ch>
import bob.learn.linear
import bob.io.base
import numpy
import scipy.spatial
from .Algorithm import Algorithm
import logging
logger = logging.getLogger("bob.bio.base")
class PCA (Algorithm):
"""Tool for computing eigenfaces"""
def __init__(
self,
subspace_dimension, # if int, number of subspace dimensions; if float, percentage of variance to keep
distance_function = scipy.spatial.distance.euclidean,
is_distance_function = True,
uses_variances = False,
**kwargs # parameters directly sent to the base class
):
"""Initializes the PCA Algorithm with the given setup"""
# call base class constructor and register that the tool performs a projection
Algorithm.__init__(
self,
performs_projection = True,
subspace_dimension = subspace_dimension,
distance_function = str(distance_function),
is_distance_function = is_distance_function,
uses_variances = uses_variances,
**kwargs
)
self.m_subspace_dim = subspace_dimension
self.m_machine = None
self.m_distance_function = distance_function
self.m_factor = -1. if is_distance_function else 1.
self.m_uses_variances = uses_variances
def train_projector(self, training_features, projector_file):
"""Generates the PCA covariance matrix"""
# Initializes the data
data = numpy.vstack([feature.flatten() for feature in training_features])
logger.info(" -> Training LinearMachine using PCA")
t = bob.learn.linear.PCATrainer()
self.m_machine, self.m_variances = t.train(data)
# For re-shaping, we need to copy...
self.m_variances = self.m_variances.copy()
# compute variance percentage, if desired
if isinstance(self.m_subspace_dim, float):
cummulated = numpy.cumsum(self.m_variances) / numpy.sum(self.m_variances)
for index in range(len(cummulated)):
if cummulated[index] > self.m_subspace_dim:
self.m_subspace_dim = index
break
self.m_subspace_dim = index
logger.info(" ... Keeping %d PCA dimensions", self.m_subspace_dim)
# re-shape machine
self.m_machine.resize(self.m_machine.shape[0], self.m_subspace_dim)
self.m_variances.resize(self.m_subspace_dim)
f = bob.io.base.HDF5File(projector_file, "w")
f.set("Eigenvalues", self.m_variances)
f.create_group("Machine")
f.cd("/Machine")
self.m_machine.save(f)
def load_projector(self, projector_file):
"""Reads the PCA projection matrix from file"""
# read PCA projector
f = bob.io.base.HDF5File(projector_file)
self.m_variances = f.read("Eigenvalues")
f.cd("/Machine")
self.m_machine = bob.learn.linear.Machine(f)
# Allocates an array for the projected data
self.m_projected_feature = numpy.ndarray(self.m_machine.shape[1], numpy.float64)
def project(self, feature):
"""Projects the data using the stored covariance matrix"""
# Projects the data
self.m_machine(feature, self.m_projected_feature)
# return the projected data
return self.m_projected_feature
def enroll(self, enroll_features):
"""Enrolls the model by computing an average of the given input vectors"""
assert len(enroll_features)
# just store all the features
model = numpy.zeros((len(enroll_features), enroll_features[0].shape[0]), numpy.float64)
for n, feature in enumerate(enroll_features):
model[n,:] += feature[:]
# return enrolled model
return model
def score(self, model, probe):
"""Computes the distance of the model to the probe using the distance function taken from the config file"""
# return the negative distance (as a similarity measure)
if len(model.shape) == 2:
# we have multiple models, so we use the multiple model scoring
return self.score_for_multiple_models(model, probe)
elif self.m_uses_variances:
# single model, single probe (multiple probes have already been handled)
return self.m_factor * self.m_distance_function(model, probe, self.m_variances)
else:
# single model, single probe (multiple probes have already been handled)
return self.m_factor * self.m_distance_function(model, probe)
from .Algorithm import Algorithm
from .PCA import PCA
#!/usr/bin/env python
import bob.bio.base
import scipy.spatial
algorithm = bob.bio.base.algorithm.PCA(
subspace_dimension = 30,
distance_function = scipy.spatial.distance.euclidean,
is_distance_function = True
)
#!/usr/bin/env python
import bob.bio.base
# Linearization of the data to a vector, no data type specified
extractor = bob.bio.base.extractor.Linearize()
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# @author: Manuel Guenther <Manuel.Guenther@idiap.ch>
# @date: Fri Oct 26 17:05:40 CEST 2012
#
# Copyright (C) 2011-2012 Idiap Research Institute, Martigny, Switzerland
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from .Extractor import Extractor
import numpy
class Linearize (Extractor):
"""Extracts features by simply concatenating all pixels of the data into one long vector"""
def __init__(self, dtype=None):
"""If the ``dtype`` parameter is given, it specifies the data type that is enforced for the features."""
Extractor.__init__(self, dtype = dtype)
self.dtype = dtype
def __call__(self, data):
"""Takes data of arbitrary dimensions and linearizes it into a 1D vector; enforcing the data type, if desired"""
linear = numpy.reshape(data, data.size)
if self.dtype is not None:
linear = linear.astype(self.dtype)
return linear
......@@ -5,3 +5,4 @@
"""Basic features for biometric recognition"""
from .Extractor import Extractor
from .Linearize import Linearize
import bob.bio.base
import bob.io.base.test_utils
import os
import numpy
from . import utils
def test_linearize():
# load extractor
extractor = bob.bio.base.load_resource("linearize", "extractor")
# generate input
data = utils.random_training_set((10,10), 1)[0]
assert len(data.shape) == 2
# extract features
extracted = extractor(data)
assert len(extracted.shape) == 1
assert extracted.shape[0] == data.shape[0] * data.shape[1]
assert extracted.dtype == data.dtype
# test IO
filename = bob.io.base.test_utils.temporary_filename()
try:
extractor.write_feature(extracted, filename)
extracted2 = extractor.read_feature(filename)
assert (extracted == extracted2).all()
finally:
os.remove(filename)
# extract with dtype
extractor = bob.bio.base.extractor.Linearize(dtype=numpy.complex128)
extracted = extractor(data)
assert len(extracted.shape) == 1
assert extracted.shape[0] == data.shape[0] * data.shape[1]
assert extracted.dtype == numpy.complex128
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment