Commit 6dd7344c authored by Rakesh MEHTA's avatar Rakesh MEHTA
Browse files

tests and modification to local features

parent 1b6e8eed
......@@ -89,6 +89,8 @@ setup(
'console_scripts': [
'mnist_binary_all.py = xbob.boosting.scripts.mnist_binary_all:main',
'mnist_binary_one.py = xbob.boosting.scripts.mnist_binary_one:main',
'mnist_multi.py = xbob.boosting.scripts.mnist_multi:main',
'mnist_lbp.py = xbob.boosting.scripts.mnist_lbp:main',
],
# tests that are _exported_ (that can be executed by other packages) can
......
import losses
import trainers
import boosting
\ No newline at end of file
......@@ -18,31 +18,27 @@ Loss Function: Exponential Loss (Preferred with the StumpTrainer)
Log Loss (Preferred with LutTrainer)
Tangent Loss
The module structure is the following:
- The "BoostTrainer" base class implments the
"""
import numpy as np
import numpy
import math
from pylab import *
from scipy import optimize
"""Exponential loss function """
class ExpLossFunction():
def update_loss(self, targets, scores):
return exp(-(targets * scores))
"""The function computes the exponential loss values using prediction scores and targets."""
return numpy.exp(-(targets * scores))
#return loss
def update_loss_grad(self, targets, scores):
loss = exp(-(targets * scores))
loss = numpy.exp(-(targets * scores))
return -targets * loss
#return loss_grad
......@@ -53,7 +49,7 @@ class ExpLossFunction():
weak_scores = args[3]
curr_scores_x = pred_scores + x*weak_scores
loss = self.update_loss(targets, curr_scores_x)
sum_l = np.sum(loss,0)
sum_l = numpy.sum(loss,0)
return sum_l
#@abstractmethod
......@@ -64,7 +60,7 @@ class ExpLossFunction():
weak_scores = args[3]
curr_scores_x = pred_scores + x*weak_scores
loss_grad = self.update_loss_grad(targets, curr_scores_x)
sum_g = np.sum(loss_grad*weak_scores, 0)
sum_g = numpy.sum(loss_grad*weak_scores, 0)
return sum_g
......@@ -73,12 +69,12 @@ class ExpLossFunction():
"""Log loss function """
class LogLossFunction():
def update_loss(self, targets, scores):
e = exp(-(targets * scores))
return log(1 + e)
e = numpy.exp(-(targets * scores))
return numpy.log(1 + e)
#return loss
def update_loss_grad(self, targets, scores):
e = exp(-(targets * scores))
e = numpy.exp(-(targets * scores))
denom = 1/(1 + e)
return - targets* e* denom
......@@ -89,7 +85,7 @@ class LogLossFunction():
weak_scores = args[3]
curr_scores_x = pred_scores + x*weak_scores
loss = self.update_loss(targets, curr_scores_x)
sum_l = np.sum(loss,0)
sum_l = numpy.sum(loss,0)
return sum_l
#@abstractmethod
......@@ -100,7 +96,7 @@ class LogLossFunction():
weak_scores = args[3]
curr_scores_x = pred_scores + x*weak_scores
loss_grad = self.update_loss_grad( targets, curr_scores_x)
sum_g = np.sum(loss_grad*weak_scores, 0)
sum_g = numpy.sum(loss_grad*weak_scores, 0)
return sum_g
......@@ -116,12 +112,12 @@ class LogLossFunction():
class TangLossFunction():
def update_loss(self, targets, scores):
loss = (2* np.arctan(targets * scores) -1)**2
loss = (2* numpy.arctan(targets * scores) -1)**2
return loss
def update_loss_grad(self, targets, scores):
m = targets*scores
numer = 4*(2*np.arctan(m) -1)
numer = 4*(2*numpy.arctan(m) -1)
denom = 1 + m**2
loss_grad = numer/denom
return loss_grad
......@@ -133,7 +129,7 @@ class TangLossFunction():
weak_scores = args[3]
curr_scores_x = pred_scores + x*weak_scores
loss = self.update_loss(targets, curr_scores_x)
return np.sum(loss, 0)
return numpy.sum(loss, 0)
#@abstractmethod
def loss_grad_sum(self, *args):
......@@ -143,7 +139,7 @@ class TangLossFunction():
weak_scores = args[3]
curr_scores_x = pred_scores + x*weak_scores
loss_grad = self.update_loss_grad( targets, curr_scores_x)
return np.sum(loss_grad*weak_scores, 0)
return numpy.sum(loss_grad*weak_scores, 0)
......@@ -182,9 +178,9 @@ class StumpTrainer():
# Initialization
numSamp, numFea = fea.shape
th = np.zeros([numFea])
p = np.zeros([numFea])
g = np.zeros([numFea])
th = numpy.zeros([numFea])
p = numpy.zeros([numFea])
g = numpy.zeros([numFea])
# For each feature find the optimum threshold, polarity and the gain
for i in range(numFea):
......@@ -219,18 +215,18 @@ class StumpTrainer():
num_samp = f.shape[0]
# Sort the feature and rearrange the corresponding weights and feature values
sorted_id = np.argsort(f)
sorted_id = numpy.argsort(f)
f = f[sorted_id]
loss_grad = loss_grad[sorted_id]
# For all the threshold compute the dot product
grad_cs = np.cumsum(loss_grad)
grad_cs = numpy.cumsum(loss_grad)
grad_sum = grad_cs[-1]
g = (grad_sum - grad_cs)
# Find the index that maximizes the dot product
opt_id = np.argmax(np.absolute(g))
g_opt = np.absolute(g[opt_id])
opt_id = numpy.argmax(numpy.absolute(g))
g_opt = numpy.absolute(g[opt_id])
# Find the corresponding threshold value
th = 0.0
......@@ -259,7 +255,7 @@ class StumpTrainer():
def get_weak_scores(self,test_features):
# Initialize the values
numSamp = test_features.shape[0]
weak_scores = np.ones([numSamp,1])
weak_scores = numpy.ones([numSamp,1])
# Select feature corresponding to the specific index
weak_features = test_features[:,self.selected_indices]
......@@ -282,9 +278,9 @@ class LutTrainer():
def __init__(self, num_entries, selection_type, num_op):
self.num_entries = num_entries
self.luts = np.ones((num_entries, num_op), dtype = np.int)
self.luts = numpy.ones((num_entries, num_op), dtype = numpy.int)
self.selection_type = selection_type
self.selected_indices = np.zeros([num_op,1], 'int16')
self.selected_indices = numpy.zeros([num_op,1], 'int16')
""" The function to learn the weak LutTrainer. """
......@@ -293,7 +289,7 @@ class LutTrainer():
# Initializations
num_op = loss_grad.shape[1]
fea_grad = np.zeros([self.num_entries,num_op])
fea_grad = numpy.zeros([self.num_entries,num_op])
# Compute the sum of the gradient based on the feature values or the loss associated with each
# feature index
......@@ -308,7 +304,7 @@ class LutTrainer():
# indep (independent) feature selection is used if all the dimension of output use different feature
# each of the selected feature minimize a dimension of the loss function
selected_indices = [np.argmin(col) for col in np.transpose(sum_loss)]
selected_indices = [numpy.argmin(col) for col in numpy.transpose(sum_loss)]
for oi in range(num_op):
curr_id = sum_loss[:,oi].argmin()
......@@ -321,9 +317,9 @@ class LutTrainer():
# for 'shared' feature selection the loss function is summed over multiple dimensions and
# the feature that minimized this acumulative loss is used for all the outputs
accum_loss = np.sum(sum_loss,1)
accum_loss = numpy.sum(sum_loss,1)
selected_findex = accum_loss.argmin()
self.selected_indices = selected_findex*np.ones([num_op,1],'int16')
self.selected_indices = selected_findex*numpy.ones([num_op,1],'int16')
for oi in range(num_op):
fea_grad[:,oi] = self.compute_hgrad(loss_grad[:,oi],fea[:,selected_findex])
......@@ -347,7 +343,7 @@ class LutTrainer():
num_fea = len(fea[0])
num_samp = len(fea)
num_op = len(loss_grad[0])
sum_loss = np.zeros([num_fea,num_op])
sum_loss = numpy.zeros([num_fea,num_op])
# Compute the loss for each feature
for fi in range(num_fea):
......@@ -370,7 +366,7 @@ class LutTrainer():
def compute_hgrad(self, loss_grado,fval):
# initialize the values
num_samp = len(fval)
hist_grad = np.zeros([self.num_entries])
hist_grad = numpy.zeros([self.num_entries])
# compute the sum of the gradient
for hi in range(self.num_entries):
......@@ -386,10 +382,10 @@ class LutTrainer():
def get_weak_scores(self, fset):
num_samp = len(fset)
num_op = len(self.luts[0])
weak_scores = np.zeros([num_samp,num_op])
weak_scores = numpy.zeros([num_samp,num_op])
for oi in range(num_op):
a = self.luts[fset[:,self.selected_indices[oi]],oi]
weak_scores[:,oi] = np.transpose(self.luts[fset[:,self.selected_indices[oi]],oi])
weak_scores[:,oi] = numpy.transpose(self.luts[fset[:,self.selected_indices[oi]],oi])
return weak_scores
......@@ -410,6 +406,7 @@ class BoostMachine():
def add_weak_trainer(self, curr_trainer, curr_alpha):
self.alpha.append(curr_alpha)
print curr_alpha
self.weak_trainer.append(curr_trainer)
......@@ -418,8 +415,8 @@ class BoostMachine():
# Initilization
num_trainer = len(self.weak_trainer)
num_samp = test_features.shape[0]
pred_labels = np.ones([num_samp, self.num_op])
pred_scores = np.zeros([num_samp, self.num_op])
pred_labels = -numpy.ones([num_samp, self.num_op])
pred_scores = numpy.zeros([num_samp, self.num_op])
# For each round of boosting calculate the weak scores for that round and add to the total
......@@ -428,7 +425,12 @@ class BoostMachine():
weak_scores = curr_trainer.get_weak_scores(test_features)
pred_scores = pred_scores + self.alpha[i] * weak_scores
pred_labels[pred_scores <=0] = -1
# predict the labels for test features based on score sign (for binary case) and score value (multivariate case)
if(self.num_op == 1):
pred_labels[pred_scores >=0] = 1
else:
score_max = numpy.argmax(pred_scores, axis = 1)
pred_labels[range(num_samp),score_max] = 1
return pred_labels
......@@ -512,12 +514,12 @@ class Boost:
# Initializations
if(len(targets.shape) == 1):
targets = targets[:,np.newaxis]
targets = targets[:,numpy.newaxis]
num_op = targets.shape[1]
machine = BoostMachine(num_op)
num_samp = fset.shape[0]
pred_scores = np.zeros([num_samp,num_op])
pred_scores = numpy.zeros([num_samp,num_op])
loss_class = LOSS_FUNCTIONS[self.loss_type]
loss_ = loss_class()
......@@ -548,7 +550,7 @@ class Boost:
# Initialize the start point for lbfgs minimization
f0 = np.zeros(num_op)
f0 = numpy.zeros(num_op)
# Perform lbfgs minimization and compute the scale (alpha_r) for current weak trainer
......
......@@ -5,61 +5,52 @@ import random
#import weaklearner
from pylab import *
def compute_blkimgs(img,Cx,Cy):
img_rows = img.shape[0]
img_cols = img.shape[1]
blk_imgs = []
for cx in range(Cx):
for cy in range(Cy):
print 'cx %d cy %d' % (cx,cy)
num_rows = img_rows - (cy)
num_cols = img_cols - (cx)
curr_img = np.zeros([num_rows,num_cols])
for i in range(cx+1):
for j in range(cy+1):
print i
print j
curr_img = curr_img + img[i:i+num_rows,j:j+num_cols]
blk_imgs.append(curr_img)
return blk_imgs
def test_func():
a = np.ones([20,24])
b = compute_mlbp(a,6,7)
def integral_img(img):
int1 = cumsum(img,0)
int2 = cumsum(int1,1)
return int2
def compute_mlbp(img,cx,cy):
int_imgc = integral_img(img)
rows = img.shape[0]
cols = img.shape[1]
int_img = np.zeros([rows+1,cols+1])
int_img[1:,1:] = int_imgc
num_neighbours = 8
for isx in range(cx):
for isy in range(cy):
sx = isx +1
sy = isy +1
blk_int = int_img[sy:,sx:] + int_img[0:-sy,0:-sx] - int_img[sy:,0:-sx] - int_img[0:-sy,sx:]
blk_dimy, blk_dimx = blk_int.shape
#fmap_dimx = blk_dimx - 2*sx
#fmap_dimy = blk_dimy - 2*sy
fmap_dimx = blk_dimx - 2
fmap_dimy = blk_dimy - 2
fmap = np.zeros([fmap_dimy,fmap_dimx])
#coord = [[0,0],[0,sx],[0,2*sx],[sy,2*sx],[2*sy,2*sx],[2*sy,sx],[2*sy,0],[sy,0]]
coord = [[0,0],[0,1],[0,2],[1,2],[2,2],[2,1],[2,0],[1,0]]
#blk_center = blk_int[sy:sy+fmap_dimy,sx:sx+fmap_dimx]
blk_center = blk_int[1:1+fmap_dimy,1:1+fmap_dimx]
for ind in range(num_neighbours):
fmap = fmap + (2**ind)*(blk_int[coord[ind][0]:coord[ind][0] + fmap_dimy,coord[ind][1]:coord[ind][1] + fmap_dimx]>= blk_center)
return fmap
class mlbp():
def __init__(self, ftype):
self.ftype = ftype
def test_func():
a = np.ones([20,24])
b = compute_mlbp(a,6,7)
def integral_img(self,img):
int1 = cumsum(img,0)
int2 = cumsum(int1,1)
return int2
def compute_mlbp(self,img,cx,cy):
# Initializations
int_imgc = self.integral_img(img)
rows, cols = img.shape
int_img = np.zeros([rows+1,cols+1])
int_img[1:,1:] = int_imgc
num_neighbours = 8
fvec = np.empty(0, dtype = 'uint8')
for isx in range(cx):
for isy in range(cy):
sx = isx +1
sy = isy +1
blk_int = int_img[sy:,sx:] + int_img[0:-sy,0:-sx] - int_img[sy:,0:-sx] - int_img[0:-sy,sx:]
blk_dimy, blk_dimx = blk_int.shape
fmap_dimx = blk_dimx - 2
fmap_dimy = blk_dimy - 2
fmap = np.zeros([fmap_dimy,fmap_dimx])
coord = [[0,0],[0,1],[0,2],[1,2],[2,2],[2,1],[2,0],[1,0]]
blk_center = blk_int[1:1+fmap_dimy,1:1+fmap_dimx]
for ind in range(num_neighbours):
if(self.ftype == 'lbp'):
fmap = fmap + (2**ind)*(blk_int[coord[ind][0]:coord[ind][0] + fmap_dimy,coord[ind][1]:coord[ind][1] + fmap_dimx]>= blk_center)
elif(self.ftype == 'tlbp'):
comp_img = blk_int[coord[(ind+1)%8][0]:coord[(ind+1)%8][0] + fmap_dimy,coord[(ind+1)%8][1]:coord[(ind+1)%8][1] + fmap_dimx]
fmap = fmap + (2**ind)*(blk_int[coord[ind][0]:coord[ind][0] + fmap_dimy,coord[ind][1]:coord[ind][1] + fmap_dimx]>= comp_img)
vec = np.reshape(fmap,fmap.shape[0]*fmap.shape[1],1)
fvec = np.hstack((fvec,vec))
return fvec
......
......@@ -15,7 +15,7 @@ import numpy
import sys, getopt
import argparse
import string
from ..core import booster
from ..core import boosting
import xbob.db.mnist
def main():
......@@ -57,7 +57,7 @@ def main():
# Initilize the trainer with 'LutTrainer' or 'StumpTrainer'
boost_trainer = booster.Boost(args.trainer_type)
boost_trainer = boosting.Boost(args.trainer_type)
# Set the parameters for the boosting
boost_trainer.num_rnds = args.num_rnds
......
......@@ -4,7 +4,8 @@
The MNIST data is exported using the xbob.db.mnist module which provide the train and test
partitions for the digits. Pixel values of grey scale images are used as features and the
available algorithms for classification are Lut based Boosting and Stump based Boosting.
The script test digits provided by the command line. Thus it conducts only one binary classifcation test.
Thus it conducts only one binary classifcation test.
"""
......@@ -14,7 +15,7 @@ import numpy
import sys, getopt
import string
import argparse
from ..core import booster
from ..core import boosting
import xbob.db.mnist
def main():
......@@ -53,7 +54,7 @@ def main():
# Initilize the trainer with 'LutTrainer' or 'StumpTrainer'
boost_trainer = booster.Boost(args.trainer_type)
boost_trainer = boosting.Boost(args.trainer_type)
# Set the parameters for the boosting
boost_trainer.num_rnds = args.num_rnds
......
......@@ -11,30 +11,34 @@
import xbob.db.mnist
import numpy
import numpy as np
import sys, getopt
import argparse
from ..core import booster
import xbob.db.mnist
import string
from ..core import boosting
import matplotlib.pyplot as mpl
def main():
parser = argparse.ArgumentParser(description = " The arguments for the boosting. ")
parser.add_argument('-t', default = 'StumpTrainer',dest = "trainer_type", type = string, choices = {'StumpTrainer', 'LutTrainer'}, help = "This is the type of trainer used for the boosting." )
parser.add_argument('-r', default = 20, dest = "num_rnds", type = string , help = "The number of round for the boosting")
parser.add_argument('-l', default = 'exp', dest = "loss_type", type= string,choices = {'log','exp'} help = "The type of the loss function. Logit and Exponential functions are the avaliable options")
parser.add_argument('-s', default = 'indep', dest = "selection_type", choices = {'indep', 'shared'}, type = string, help = "The feature selection type for the LUT based trainer. For multivarite case the features can be selected by sharing or independently ")
parser.add_argument('-t', default = 'LutTrainer',dest = "trainer_type", type = str, choices = {'StumpTrainer', 'LutTrainer'}, help = "This is the type of trainer used for the boosting." )
parser.add_argument('-r', default = 20, dest = "num_rnds", type = int, help = "The number of round for the boosting")
parser.add_argument('-l', default = 'exp', dest = "loss_type", type= str, choices = {'log','exp'}, help = "The type of the loss function. Logit and Exponential functions are the avaliable options")
parser.add_argument('-s', default = 'indep', dest = "selection_type", choices = {'indep', 'shared'}, type = str, help = "The feature selection type for the LUT based trainer. For multivarite case the features can be selected by sharing or independently ")
parser.add_argument('-n', default = 256, dest = "num_entries", type = int, help = "The number of entries in the LookUp table. It is the range of the feature values, e.g. if LBP features are used this values is 256.")
args = parser.parse_args()
# download the dataset
db_object = xbob.db.mnist.Database()
# Hardcode the number of digits
num_digits = 10
# get the data (features and labels) for the selected digits from the xbob_db_mnist class functions
fea_train, label_train = db_object.data('train',labels = range(10))
fea_test, label_test = db_object.data('test', labels = range(10))
fea_train, label_train = db_object.data('train',labels = range(num_digits))
fea_test, label_test = db_object.data('test', labels = range(num_digits))
# Format the label data into int and change the class labels to -1 and +1
......@@ -42,16 +46,16 @@ def main():
label_test = label_test.astype(int)
# initialize the label data for multivariate case
train_targets = -np.ones([fea_tr.shape[0],10])
test_targets = -np.ones([fea_ts.shape[0],10])
train_targets = -np.ones([fea_train.shape[0],num_digits])
test_targets = -np.ones([fea_test.shape[0],num_digits])
for i in range(10):
train_targets[label_tr == i,i] = 1
test_targets[label_ts == i,i] = 1
for i in range(num_digits):
train_targets[label_train == i,i] = 1
test_targets[label_test == i,i] = 1
# Initilize the trainer with 'LutTrainer' or 'StumpTrainer'
boost_trainer = booster.Boost(args.trainer_type)
boost_trainer = boosting.Boost(args.trainer_type)
# Set the parameters for the boosting
boost_trainer.num_rnds = args.num_rnds
......@@ -60,16 +64,47 @@ def main():
boost_trainer.num_entries = args.num_entries
# Perform boosting of the feature set samp
# Perform boosting of the feature set samp
print fea_train.shape
print train_targets.shape
machine = boost_trainer.train(fea_train, train_targets)
# Classify the test samples (testsamp) using the boosited classifier generated above
prediction_labels = machine.classify(fea_test)
# Calulate the values for confusion matrix
score = np.zeros([10,10])
for i in range(num_digits):
prediction_i = prediction_labels[test_targets[:,i] == 1,:]
print prediction_i.shape
for j in range(num_digits):
score[i,j] = sum(prediction_i[:,j] == 1)
np.savetxt('conf_mat.out', score, delimiter=',')
cm = score/np.sum(score,1)
res = mpl.imshow(cm, cmap=mpl.cm.summer, interpolation='nearest')
for x in np.arange(cm.shape[0]):
for y in np.arange(cm.shape[1]):
col = 'white'
if cm[x,y] > 0.5: col = 'black'
mpl.annotate('%.2f' % (100*cm[x,y],), xy=(y,x), color=col,
fontsize=8, horizontalalignment='center', verticalalignment='center')
classes = [str(k) for k in range(10)]
mpl.xticks(np.arange(10), classes)
mpl.yticks(np.arange(10), classes, rotation=90)
mpl.ylabel("(Your prediction)")
mpl.xlabel("(Real class)")
mpl.title("Confusion Matrix (%s set) - in %%" % set_name)
mpl.show()
# Calculate the accuracy in percentage for the curent classificaiton test
accuracy = 100*float(sum(np.sum(prediction_labels == test_targets,1) == num_op))/float(prediction_labels.shape[0])
accuracy = 100*float(sum(np.sum(prediction_labels == test_targets,1) == num_digits))/float(prediction_labels.shape[0])
print "The accuracy of binary classification test for digits %d and %d is %f " % (digit1, digit2, accuracy)
print "The average accuracy of classification is %f " % (accuracy)
......
import xbob.db.mnist
import numpy as np
import DummyBoost
import boostMachine
import booster
num_train_samples = 10000
accu = 0
......@@ -48,10 +47,10 @@ label_ts[label_ts == digit2] = -1
#label_ts = label_ts[:,np.newaxis]
boost_trainer = DummyBoost.Boost('LutTrainer')
boost_trainer = booster.Boost('LutTrainer')
# Set the parameters for the boosting
boost_trainer.num_rnds = 20 # The number of rounds in boosting
boost_trainer.num_rnds = 1 # The number of rounds in boosting
boost_trainer.bl_type = 'exp' # Type of baseloss functions l(y,f(x)), its can take one of these values ('exp', 'log', 'symexp', 'symlog')
boost_trainer.s_type = 'indep' # It can be 'indep' or 'shared' for details check cosim thesis
boost_trainer.num_entries = 256 # The number of entries in the LUT, it is the range of the discrete features
...