Commit 3ecc3a08 authored by Rakesh MEHTA's avatar Rakesh MEHTA
Browse files

Comments added and modification for univariate boosting

parent 9a419ab0
......@@ -41,16 +41,6 @@ class Boost:
Parameters:
num_rnds: Type int, Default = 100
The number of rounds of boosting. The boosting strategies implemented here
(GradBoost and TaylorBoost) are fairly robust to overfitting, so the large
number of rounds generally results in a small error rate.
loss_type: Type string, Default = 'log'
It is the type of loss function to be optimized. Currently we support the
following classes of loss function:
'log', 'exp' and 'tang'.
'exp' loss function is preferred with StumpTrainer and 'log' with LutTrainer.
trainer_type: Type string, Default = 'stump'
The type of weak trainer to be learned. Two types of weak trainers are
......@@ -64,6 +54,19 @@ class Boost:
used as optimization strategy.It can be used with both discrete
and continuous type of features
num_rnds: Type int, Default = 100
The number of rounds for boosting. The boosting strategies implemented here
(GradBoost and TaylorBoost) are fairly robust to overfitting, so the large
number of rounds generally results in a small error rate.
loss_type: Type string, Default = 'log'
It is the type of loss function to be optimized. Currently we support the
following classes of loss function:
'log' and 'exp'
'exp' loss function is preferred with StumpTrainer and 'log' with LutTrainer.
num_entries: Type int, Default = 256
This is the parameter for the LutTrainer. It is the
number of entries in the LookUp table. It can be determined from the range of
......@@ -71,11 +74,6 @@ class Boost:
LookUp table is 256.
lut_loss: Type string, Default = 'expectational'
For LutTrainer two types of loss function are supported: expectational and variational.
Variational perform marginally better than the expectational loss as reported in Cosmin's
thesis, however at the expense of high computational complexity.
This parameter can be set to 'expectational' or 'variational'.
lut_selection: Type string, Default = 'indep'
......@@ -84,6 +82,23 @@ class Boost:
For feature sharing set the parameter to 'shared' and for independent selection set it to
'indep'. See cosmin's thesis for a detailed explanation on the feature selection type.
For univariate cases such as face detection this parameter is not relevant.
Example Usage:
# Initialize the boosting parameter
num_rounds = 50
feature_range = 256
loss_type = 'log'
selection_type = 'indep'
boost_trainer = boosting.Boost('LutTrainer', num_rounds, feature_range, loss_type, selection_type )
# Train machine using training samples
machine = boost_trainer.train(train_fea, train_targets)
# Classify the samples using boosted classifier
prediction_labels = machine.classify(test_fea)
"""
......@@ -133,12 +148,21 @@ class Boost:
as a combination of weak classifier.
Inputs:
fset: (num_sam x num_features) features extracted from the samples
features should be discrete
Type: numpy array
fset: features extracted from the samples
features should be discrete for lutTrainer.
Type: numpy array (num_sam x num_features)
labels: class labels of the samples of dimension (#samples x #outputs)
Type: numpy array
labels: class labels of the samples
Type: numpy array
Shape for binary classification: #number of samples
Shape for multivariate classification: #number of samples x #number of outputs
Examples for 4 classes case (0,1,2,3) and three test samples.
[[0,1,0,0], #Predicted class is 1
[1,0,0,0], #Predicted class is 0
[0,0,0,1]] #Predicted class is 3
There can be only single 1 in a row and the index of 1 indicates the class.
Return:
machine: The boosting machine that is combination of the weak classifiers.
......@@ -166,8 +190,8 @@ class Boost:
weak_trainer = trainers.LutTrainer(self.num_entries, self.lut_selection, num_op )
elif self.weak_trainer_type == 'StumpTrainer':
weak_trainer = trainers.StumpTrainer()
elif self.weak_trainer_type == 'GaussTrainer':
weak_trainer = trainers.GaussianTrainer(3)
#elif self.weak_trainer_type == 'GaussTrainer':
# weak_trainer = trainers.GaussianTrainer(3)
# Start boosting iterations for num_rnds rounds
......@@ -246,8 +270,20 @@ class BoostMachine():
Return:
prediction_labels: The predicted classes for the test samples
Type: numpy array (#number of samples)
prediction_labels: The predicted classes for the test samples. It is a binary numpy array where
1 indicates the predicted class.
Type: numpy array
Shape for binary classification: #number of samples
Shape for multivariate classification: #number of samples x #number of outputs
Examples for 4 classes case (0,1,2,3) and three test samples.
[[0,1,0,0], #Predicted class is 1
[1,0,0,0], #Predicted class is 0
[0,0,0,1]] #Predicted class is 3
There can be only single 1 in a row and the index of 1 indicates the class.
"""
# Initialization
num_trainer = len(self.weak_trainer)
......@@ -265,6 +301,7 @@ class BoostMachine():
# predict the labels for test features based on score sign (for binary case) and score value (multivariate case)
if(self.num_op == 1):
pred_labels[pred_scores >=0] = 1
pred_labels = numpy.squeeze(pred_labels)
else:
score_max = numpy.argmax(pred_scores, axis = 1)
pred_labels[range(num_samp),score_max] = 1
......
......@@ -355,7 +355,7 @@ class LutTrainer():
# compute the sum of the gradient
hist_grad, bin_val = numpy.histogram(features,bins = self.num_entries, range = (0,self.num_entries-1), weights = loss_grado)
hist_grad, bin_val = numpy.histogram(features, bins = self.num_entries, range = (0,self.num_entries-1), weights = loss_grado)
# hist_grad = [sum(loss_grado[features == feature_value]) for feature_value in xrange(self.num_entries)]
#for feature_value in range(self.num_entries):
# hist_grad[feature_value] = sum(loss_grado[features == feature_value])
......
#!/usr/bin/env python
"""The test script to perform the multivariate classification on the digits from the MNIST dataset.
The MNIST data is exported using the xbob.db.mnist module which provide the train and test
partitions for the digits. LBP features are extracted and the available algorithms for
classification is Lut based Boosting.
"""
import xbob.db.mnist
import numpy
import sys, getopt
import argparse
import string
import bob
from ..util import confusion
from ..features import local_feature
from ..core import boosting
import matplotlib.pyplot
def main():
parser = argparse.ArgumentParser(description = " The arguments for the boosting. ")
parser.add_argument('-r', default = 20, dest = "num_rnds", type = int, help = "The number of round for the boosting")
parser.add_argument('-l', default = 'exp', dest = "loss_type", type= str, choices = {'log','exp'}, help = "The type of the loss function. Logit and Exponential functions are the avaliable options")
parser.add_argument('-s', default = 'indep', dest = "selection_type", choices = {'indep', 'shared'}, type = str, help = "The feature selection type for the LUT based trainer. For multivarite case the features can be selected by sharing or independently ")
parser.add_argument('-n', default = 256, dest = "num_entries", type = int, help = "The number of entries in the LookUp table. It is the range of the feature values, e.g. if LBP features are used this values is 256.")
parser.add_argument('-f', default = 'lbp', dest = "feature_type", type = str, choices = {'lbp', 'mlbp', 'tlbp', 'dlbp'}, help = "The type of LBP features to be extracted from the image to perform the classification. The features are extracted from the block of varying scales")
parser.add_argument('-d', default = 10, dest = "num_digits", type = int, help = "The number of digits to be considered for classification.")
args = parser.parse_args()
# download the dataset
db_object = xbob.db.mnist.Database()
# Hardcode the number of digits and the image size
num_digits = args.num_digits
img_size = 28
# get the data (features and labels) for the selected digits from the xbob_db_mnist class functions
train_img, label_train = db_object.data('train',labels = range(num_digits))
test_img, label_test = db_object.data('test', labels = range(num_digits))
# Format the label data into int and change the class labels to -1 and +1
label_train = label_train.astype(int)
label_test = label_test.astype(int)
# initialize the label data for multivariate case
train_targets = -numpy.ones([train_img.shape[0],num_digits])
test_targets = -numpy.ones([test_img.shape[0],num_digits])
for i in range(num_digits):
train_targets[label_train == i,i] = 1
test_targets[label_test == i,i] = 1
# Extract the lbp features from the images
lbp_extractor = bob.ip.LBP(8)
temp_img = train_img[0,:].reshape([img_size,img_size])
output_image_size = lbp_extractor.get_lbp_shape(temp_img)
feature_dimension = output_image_size[0]*output_image_size[1]
train_fea = numpy.zeros((train_img.shape[0], feature_dimension))
test_fea = numpy.zeros((test_img.shape[0], feature_dimension))
for i in range(train_img.shape[0]):
current_img = train_img[i,:].reshape([img_size,img_size])
lbp_output_image = numpy.ndarray ( output_image_size, dtype = numpy.uint16 )
lbp_extractor (current_img, lbp_output_image)
train_fea[i,:] = numpy.reshape(lbp_output_image, feature_dimension, 1)
for i in range(test_img.shape[0]):
current_img = test_img[i,:].reshape([img_size,img_size])
lbp_output_image = numpy.ndarray ( output_image_size, dtype = numpy.uint16 )
lbp_extractor (current_img, lbp_output_image)
test_fea[i,:] = numpy.reshape(lbp_output_image, feature_dimension, 1)
train_fea = train_fea.astype(numpy.uint8)
test_fea = test_fea.astype(numpy.uint8)
print "LBP features computed"
# Initilize the trainer with LutTrainer
boost_trainer = boosting.Boost('LutTrainer')
# Set the parameters for the boosting
boost_trainer.num_rnds = args.num_rnds
boost_trainer.loss_type = args.loss_type
boost_trainer.selection_type = args.selection_type
boost_trainer.num_entries = args.num_entries
# Perform boosting of the feature set samp
machine = boost_trainer.train(train_fea, train_targets)
# Classify the test samples (testsamp) using the boosted classifier generated above
prediction_labels = machine.classify(test_fea)
# Calulate the values for confusion matrix
confusion_matrix = numpy.zeros([num_digits,num_digits])
for i in range(num_digits):
prediction_i = prediction_labels[test_targets[:,i] == 1,:]
num_samples_i = prediction_i.shape[0]
......@@ -73,7 +73,7 @@ def main():
prediction_labels = machine.classify(fea_test)
# calculate the accuracy in percentage for the curent classificaiton test
label_test = label_test[:,numpy.newaxis]
# label_test = label_test[:,numpy.newaxis]
accuracy = 100*float(sum(prediction_labels == label_test))/(len(label_test))
print "The accuracy of binary classification test for digits %d and %d is %f " % (digit1, digit2, accuracy)
accu = accu + accuracy
......
......@@ -73,7 +73,7 @@ def main():
prediction_labels = machine.classify(fea_test)
# calculate the accuracy in percentage for the curent classificaiton test
label_test = label_test[:,numpy.newaxis]
#label_test = label_test[:,numpy.newaxis]
accuracy = 100*float(sum(prediction_labels == label_test))/(len(label_test))
print "The accuracy of binary classification test with digits %d and %d is %f " % (digit1, digit2, accuracy)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment