Skip to content
Snippets Groups Projects
Commit 10f8d7c2 authored by André Anjos's avatar André Anjos :speech_balloon:
Browse files

Remove legacy files from Iris Flower dataset

parent 41342c2b
No related branches found
No related tags found
No related merge requests found
Showing
with 1 addition and 1053 deletions
include LICENSE README.rst bootstrap.py buildout.cfg
recursive-include doc conf.py *.rst
recursive-include xbob/db/test/data *.lst
include xbob/db/iris/iris.data xbob/db/iris/iris.names
recursive-include xbob/db/base/data *.lst
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Andre Anjos <andre.anjos@idiap.ch>
# Tue Jul 19 11:50:08 2011 +0200
#
# Copyright (C) 2011-2013 Idiap Research Institute, Martigny, Switzerland
"""This example shows how to use the Iris Flower (Fisher's) Dataset to create
3-class classifier based on Neural Networks (Multi-Layer Perceptrons - MLP).
"""
from __future__ import print_function
import os
import sys
import xbob.io
import xbob.db
import xbob.measure
import xbob.machine
import xbob.trainer
import optparse
import tempfile #for package tests
import numpy
def choose_matplotlib_iteractive_backend():
"""Little logic to get interactive plotting right on OSX and Linux"""
import platform
import matplotlib
if platform.system().lower() == 'darwin': #we are on OSX
matplotlib.use('macosx')
else:
matplotlib.use('GTKAgg')
def generate_testdata(data, target):
"""Concatenates all data in a single 2D array. Examples are encoded row-wise,
features, column-wise. The same for the targets.
"""
destsize = 0
for d in data: destsize += len(d)
retval = numpy.zeros((destsize, 4), 'float64')
t_retval = numpy.zeros((destsize, target[0].shape[0]), 'float64')
retval.fill(0)
cur = 0
for k, d in enumerate(data):
retval[cur:(cur+len(d)),:] = numpy.vstack(d)
for i in range(len(d)):
t_retval[i+cur,:] = target[k]
cur += len(d)
return retval, t_retval
def create_machine(data, training_steps):
"""Creates the machine given the training data"""
mlp = xbob.machine.MLP((4, 4, len(data)))
mlp.hidden_activation = xbob.machine.HyperbolicTangentActivation()
mlp.output_activation = xbob.machine.HyperbolicTangentActivation()
mlp.randomize() #reset weights and biases to a value between -0.1 and +0.1
BATCH = 50
trainer = xbob.trainer.MLPBackPropTrainer(BATCH, xbob.trainer.SquareError(mlp.output_activation), mlp)
trainer.trainBiases = True #this is the default, but just to clarify!
trainer.momentum = 0.1 #some momenta
targets = [ #we choose the approximate Fisher response!
numpy.array([+1., -1., -1.]), #setosa
numpy.array([-1., +1., -1.]), #versicolor
numpy.array([-1., -1., +1.]), #virginica
]
# Associate the data to targets, by setting the arrayset order explicetly
datalist = [data['setosa'], data['versicolor'], data['virginica']]
# All data, as 2 x 2D arrays containing data and targets
AllData, AllTargets = generate_testdata(datalist, targets)
# A helper to select and shuffle the data
S = xbob.trainer.DataShuffler(datalist, targets)
# We now iterate for several steps, look for the convergence
retval = [xbob.machine.MLP(mlp)]
for k in range(training_steps):
input, target = S(BATCH)
# We use "train_" which is unchecked and faster. Use train() if you want
# checks! See the MLPBackPropTrainer documentation for details on this
# before choosing the wrong approach.
trainer.train_(mlp, input, target)
print("|RMSE| @%d:" % (k,), end=' ')
print(numpy.linalg.norm(xbob.measure.rmse(mlp(AllData), AllTargets)))
retval.append(xbob.machine.MLP(mlp))
return retval #all machines => nice plotting!
def process_data(machine, data):
"""Iterates over classes and passes data through the trained machine"""
output = {}
for cl in data.keys():
output[cl]=machine.forward(data[cl])
return output
def plot(output):
"""Plots each of the outputs, with the classes separated by colors.
"""
import matplotlib.pyplot as mpl
histo = [{}, {}, {}]
for k in output.keys():
for i in range(len(histo)):
histo[i][k] = numpy.vstack(output[k])[:,i]
order = ['setosa', 'versicolor', 'virginica']
color = ['green', 'blue', 'red']
FAR = []
FRR = []
THRES = []
# Calculates separability
for i, O in enumerate(order):
positives = histo[i][O].copy() #make it C-style contiguous
negatives = numpy.hstack([histo[i][k] for k in order if k != O])
# note: threshold a posteriori! (don't do this at home, kids ;-)
thres = xbob.measure.eer_threshold(negatives, positives)
far, frr = xbob.measure.farfrr(negatives, positives, thres)
FAR.append(far)
FRR.append(frr)
THRES.append(thres)
# Plots the class histograms
plot_counter = 0
for O, C in zip(order, color):
for k in range(len(histo)):
plot_counter += 1
mpl.subplot(len(histo), len(order), plot_counter)
mpl.hist(histo[k][O], bins=20, color=C, range=(-1,+1), label='Setosa', alpha=0.5)
mpl.vlines((THRES[k],), 0, 60, colors=('red',), linestyles=('--',))
mpl.axis([-1.1,+1.1,0,60])
mpl.grid(True)
if k == 0: mpl.ylabel("Data %s" % O.capitalize())
if O == order[-1]: mpl.xlabel("Output %s" % order[k].capitalize())
if O == order[0]: mpl.title("EER = %.1f%%" % (100*(FAR[k] + FRR[k])/2))
def fig2bzarray(fig):
"""
@brief Convert a Matplotlib figure to a 3D blitz array with RGB channels and
return it
@param fig a matplotlib figure
@return a blitz 3D array of RGB values
"""
import numpy
# draw the renderer
fig.canvas.draw()
# Get the RGB buffer from the figure, re-shape it adequately
w,h = fig.canvas.get_width_height()
buf = numpy.fromstring(fig.canvas.tostring_rgb(),dtype=numpy.uint8)
buf.shape = (h,w,3)
buf = numpy.transpose(buf, (2,0,1))
return numpy.array(buf)
def makemovie(machines, data, filename=None):
"""Plots each of the outputs, with the classes separated by colors.
"""
if not filename:
choose_matplotlib_iteractive_backend()
else:
import matplotlib
if not hasattr(matplotlib, 'backends'): matplotlib.use('Agg')
import matplotlib.pyplot as mpl
output = None
orows = 0
ocols = 0
if not filename: #start interactive plot animation
mpl.ion()
else:
# test output size
processed = process_data(machines[0], data)
plot(processed)
refimage = fig2bzarray(mpl.gcf())
orows = int(2*(refimage.shape[1]/2))
ocols = int(2*(refimage.shape[2]/2))
output = xbob.io.VideoWriter(filename, orows, ocols, 5) #5 Hz
print("Saving %d frames to %s" % (len(machines), filename))
for i, k in enumerate(machines):
# test output size
processed = process_data(k, data)
mpl.clf()
plot(processed)
mpl.suptitle("Fisher Iris DB / MLP Training step %d" % i)
if not filename: mpl.draw() #updates ion drawing
else:
image = fig2bzarray(mpl.gcf())
output.append(image[:,:orows,:ocols])
sys.stdout.write('.')
sys.stdout.flush()
if filename:
sys.stdout.write('\n')
sys.stdout.flush()
def main(user_input=None):
import argparse
parser = argparse.ArgumentParser(description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument("-t", "--steps", dest="steps", default=10, type=int,
help="how many training times to train the MLP",
metavar="INT")
parser.add_argument("-f", "--file", dest="filename", default=None,
help="write plot movie to FILE (implies non-interactiveness)",
metavar="FILE")
parser.add_argument("--self-test", action="store_true", dest="selftest",
default=False, help=argparse.SUPPRESS)
args = parser.parse_args(args=user_input)
# Loads the dataset and performs LDA
data = xbob.db.iris.data()
machines = create_machine(data, args.steps)
if args.selftest:
(fd, filename) = tempfile.mkstemp('.avi', 'xbobtest_')
os.close(fd)
os.unlink(filename)
makemovie(machines, data, filename)
os.unlink(filename)
else:
makemovie(machines, data, args.filename)
return 0
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Andre Anjos <andre.anjos@idiap.ch>
# Mon Jun 27 17:17:01 2011 +0200
#
# Copyright (C) 2011-2013 Idiap Research Institute, Martigny, Switzerland
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""This example shows how to use the Iris Flower (Fisher's) Dataset to create
3-class classifier based on Linear Discriminant Analysis.
Note: This example will consider all 3 classes for the LDA training. This is
*not* what Fisher did on his paper entitled "The Use of Multiple Measurements
in Taxonomic Problems", Annals of Eugenics, pp. 179-188, 1936. In that work
Fisher does the "right" thing only for the first 2-class problem (setosa x
versicolor). You can reproduce the 2-class LDA using bob's LDA training
system w/o problems. When inserting the virginica class, Fisher decides for a
different metric (4vi + ve -5se) and solves lambda for the matrices in the last
row of Table VIII.
This is OK, but does not generalize the method proposed on the begining of his
paper. Results achieved by the generalized LDA method will not match Fisher's
result on that last table, be aware. That being said, the final histogram
presented on that paper looks quite similar to the one produced by this script,
showing that Fisher's solution was approximately correct.
"""
import os
import sys
import numpy
import xbob.db
import xbob.trainer
import optparse
import tempfile #for package tests
def choose_matplotlib_iteractive_backend():
"""Little logic to get interactive plotting right on OSX and Linux"""
import platform
import matplotlib
if platform.system().lower() == 'darwin': #we are on OSX
matplotlib.use('macosx')
else:
matplotlib.use('GTKAgg')
def create_machine(data):
"""Creates the machine given the training data"""
lda = xbob.trainer.FisherLDATrainer()
machine, eigenValues = lda.train(data.values())
return machine
def process_data(machine, data):
"""Iterates over classes and passes data through the trained machine"""
output = {}
for cl in data.keys():
output[cl]=machine.forward(data[cl])
return output
def plotting(output, filename=None):
"""Cherry picks the first variable (most discriminant) and reproduces the
histogram plot Fisher has on this paper, last page.
"""
if not filename: choose_matplotlib_iteractive_backend()
else:
import matplotlib
if not hasattr(matplotlib, 'backends'): matplotlib.use('Agg')
import matplotlib.pyplot as mpl
histo = {}
for k in output.keys():
histo[k] = numpy.vstack(output[k])[:,0]
# Plots the class histograms
mpl.hist(histo['setosa'], bins=8, color='green', label='Setosa', alpha=0.5)
mpl.hist(histo['versicolor'], bins=8, color='blue', label='Versicolor',
alpha=0.5)
mpl.hist(histo['virginica'], bins=8, color='red', label='Virginica',
alpha=0.5)
mpl.legend()
mpl.grid(True)
mpl.axis([-3,+3,0,20])
mpl.title("Iris Plants / 1st. LDA component")
mpl.xlabel("LDA[0]")
mpl.ylabel("Count")
if filename: #running in a non-interactive way, save the file
mpl.savefig(filename)
else: #running in an interactive way, show the plot @ the user screen
mpl.show()
def main(user_input=None):
import argparse
parser = argparse.ArgumentParser(description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument("-f", "--file", dest="filename", default=None,
help="write plots to FILE (implies non-interactiveness)",
metavar="FILE")
parser.add_argument("--self-test", action="store_true", dest="selftest",
default=False, help=argparse.SUPPRESS)
args = parser.parse_args(args=user_input)
# Loads the dataset and performs LDA
data = xbob.db.iris.data() #NOT RETURNING GOOD VALUES! STOPPED HERE!
machine = create_machine(data)
output = process_data(machine, data)
if args.selftest:
(fd, filename) = tempfile.mkstemp('.pdf', 'xbobtest_')
os.close(fd)
os.unlink(filename)
plotting(output, filename)
os.unlink(filename)
else:
plotting(output, args.filename)
return 0
if __name__ == '__main__':
main()
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Andre Anjos <andre.anjos@idiap.ch>
# Tue Jul 19 11:50:08 2011 +0200
#
# Copyright (C) 2011-2013 Idiap Research Institute, Martigny, Switzerland
"""This example shows how to use the Iris Flower (Fisher's) Dataset to create
3-class classifier based on Neural Networks (Multi-Layer Perceptrons - MLP).
"""
from __future__ import print_function
import os
import sys
import xbob.io
import xbob.db
import xbob.measure
import xbob.machine
import xbob.trainer
import optparse
import tempfile #for package tests
import numpy
def choose_matplotlib_iteractive_backend():
"""Little logic to get interactive plotting right on OSX and Linux"""
import platform
import matplotlib
if platform.system().lower() == 'darwin': #we are on OSX
matplotlib.use('macosx')
else:
matplotlib.use('GTKAgg')
def generate_testdata(data, target):
"""Concatenates all data in a single 2D array. Examples are encoded row-wise,
features, column-wise. The same for the targets.
"""
destsize = 0
for d in data: destsize += len(d)
retval = numpy.zeros((destsize, 4), 'float64')
t_retval = numpy.zeros((destsize, target[0].shape[0]), 'float64')
retval.fill(0)
cur = 0
for k, d in enumerate(data):
retval[cur:(cur+len(d)),:] = numpy.vstack(d)
for i in range(len(d)):
t_retval[i+cur,:] = target[k]
cur += len(d)
return retval, t_retval
def create_machine(data, training_steps):
"""Creates the machine given the training data"""
mlp = xbob.machine.MLP((4, 4, len(data)))
mlp.hidden_activation = xbob.machine.HyperbolicTangentActivation()
mlp.output_activation = xbob.machine.HyperbolicTangentActivation()
mlp.randomize() #reset weights and biases to a value between -0.1 and +0.1
BATCH = 50
trainer = xbob.trainer.MLPRPropTrainer(BATCH, xbob.trainer.SquareError(mlp.output_activation), mlp)
trainer.trainBiases = True #this is the default, but just to clarify!
targets = [ #we choose the approximate Fisher response!
numpy.array([+1., -1., -1.]), #setosa
numpy.array([-1., +1., -1.]), #versicolor
numpy.array([-1., -1., +1.]), #virginica
]
# Associate the data to targets, by setting the arrayset order explicetly
datalist = [data['setosa'], data['versicolor'], data['virginica']]
# All data, as 2 x 2D arrays containing data and targets
AllData, AllTargets = generate_testdata(datalist, targets)
# A helper to select and shuffle the data
S = xbob.trainer.DataShuffler(datalist, targets)
# We now iterate for several steps, look for the convergence
retval = [xbob.machine.MLP(mlp)]
for k in range(training_steps):
input, target = S(BATCH)
# We use "train_" which is unchecked and faster. Use train() if you want
# checks! See the MLPRPropTrainer documentation for details on this before
# choosing the wrong approach.
trainer.train_(mlp, input, target)
print("|RMSE| @%d:" % (k,), end=' ')
print(numpy.linalg.norm(xbob.measure.rmse(mlp(AllData), AllTargets)))
retval.append(xbob.machine.MLP(mlp))
return retval #all machines => nice plotting!
def process_data(machine, data):
"""Iterates over classes and passes data through the trained machine"""
output = {}
for cl in data.keys():
output[cl]=machine.forward(data[cl])
return output
def plot(output):
"""Plots each of the outputs, with the classes separated by colors.
"""
import matplotlib.pyplot as mpl
histo = [{}, {}, {}]
for k in output.keys():
for i in range(len(histo)):
histo[i][k] = numpy.vstack(output[k])[:,i]
order = ['setosa', 'versicolor', 'virginica']
color = ['green', 'blue', 'red']
FAR = []
FRR = []
THRES = []
# Calculates separability
for i, O in enumerate(order):
positives = histo[i][O].copy() #make it C-style contiguous
negatives = numpy.hstack([histo[i][k] for k in order if k != O])
# note: threshold a posteriori! (don't do this at home, kids ;-)
thres = xbob.measure.eer_threshold(negatives, positives)
far, frr = xbob.measure.farfrr(negatives, positives, thres)
FAR.append(far)
FRR.append(frr)
THRES.append(thres)
# Plots the class histograms
plot_counter = 0
for O, C in zip(order, color):
for k in range(len(histo)):
plot_counter += 1
mpl.subplot(len(histo), len(order), plot_counter)
mpl.hist(histo[k][O], bins=20, color=C, range=(-1,+1), label='Setosa', alpha=0.5)
mpl.vlines((THRES[k],), 0, 60, colors=('red',), linestyles=('--',))
mpl.axis([-1.1,+1.1,0,60])
mpl.grid(True)
if k == 0: mpl.ylabel("Data %s" % O.capitalize())
if O == order[-1]: mpl.xlabel("Output %s" % order[k].capitalize())
if O == order[0]: mpl.title("EER = %.1f%%" % (100*(FAR[k] + FRR[k])/2))
def fig2bzarray(fig):
"""
@brief Convert a Matplotlib figure to a 3D blitz array with RGB channels and
return it
@param fig a matplotlib figure
@return a blitz 3D array of RGB values
"""
import numpy
# draw the renderer
fig.canvas.draw()
# Get the RGB buffer from the figure, re-shape it adequately
w,h = fig.canvas.get_width_height()
buf = numpy.fromstring(fig.canvas.tostring_rgb(),dtype=numpy.uint8)
buf.shape = (h,w,3)
buf = numpy.transpose(buf, (2,0,1))
return numpy.array(buf)
def makemovie(machines, data, filename=None):
"""Plots each of the outputs, with the classes separated by colors.
"""
if not filename:
choose_matplotlib_iteractive_backend()
else:
import matplotlib
if not hasattr(matplotlib, 'backends'): matplotlib.use('Agg')
import matplotlib.pyplot as mpl
output = None
orows = 0
ocols = 0
if not filename: #start interactive plot animation
mpl.ion()
else:
# test output size
processed = process_data(machines[0], data)
plot(processed)
refimage = fig2bzarray(mpl.gcf())
orows = int(2*(refimage.shape[1]/2))
ocols = int(2*(refimage.shape[2]/2))
output = xbob.io.VideoWriter(filename, orows, ocols, 5) #5 Hz
print("Saving %d frames to %s" % (len(machines), filename))
for i, k in enumerate(machines):
# test output size
processed = process_data(k, data)
mpl.clf()
plot(processed)
mpl.suptitle("Fisher Iris DB / MLP Training step %d" % i)
if not filename: mpl.draw() #updates ion drawing
else:
image = fig2bzarray(mpl.gcf())
output.append(image[:,:orows,:ocols])
sys.stdout.write('.')
sys.stdout.flush()
if filename:
sys.stdout.write('\n')
sys.stdout.flush()
def main(user_input=None):
import argparse
parser = argparse.ArgumentParser(description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument("-t", "--steps", dest="steps", default=10, type=int,
help="how many training times to train the MLP",
metavar="INT")
parser.add_argument("-f", "--file", dest="filename", default=None,
help="write plot movie to FILE (implies non-interactiveness)",
metavar="FILE")
parser.add_argument("--self-test",
action="store_true", dest="selftest", default=False,
help=optparse.SUPPRESS_HELP)
args = parser.parse_args(args=user_input)
# Loads the dataset and performs LDA
data = xbob.db.iris.data()
machines = create_machine(data, args.steps)
if args.selftest:
(fd, filename) = tempfile.mkstemp('.avi', 'xbobtest_')
os.close(fd)
os.unlink(filename)
makemovie(machines, data, filename)
os.unlink(filename)
else:
makemovie(machines, data, args.filename)
return 0
if __name__ == '__main__':
main()
#!/usr/bin/env python
# Andre Anjos <andre.anjos@idiap.ch>
# Thu 23 Jun 20:22:28 2011 CEST
# vim: set fileencoding=utf-8 :
"""
The Iris flower data set or Fisher's Iris data set is a multivariate data
set introduced by Sir Ronald Aylmer Fisher (1936) as an example of
discriminant analysis. It is sometimes called Anderson's Iris data set
because Edgar Anderson collected the data to quantify the geographic
variation of Iris flowers in the Gaspe Peninsula.
For more information: http://en.wikipedia.org/wiki/Iris_flower_data_set
References:
1. Fisher,R.A. "The use of multiple measurements in taxonomic problems",
Annual Eugenics, 7, Part II, 179-188 (1936); also in "Contributions to
Mathematical Statistics" (John Wiley, NY, 1950).
2. Duda,R.O., & Hart,P.E. (1973) Pattern Classification and Scene Analysis.
(Q327.D83) John Wiley & Sons. ISBN 0-471-22361-1. See page 218.
3. Dasarathy, B.V. (1980) "Nosing Around the Neighborhood: A New System
Structure and Classification Rule for Recognition in Partially Exposed
Environments". IEEE Transactions on Pattern Analysis and Machine
Intelligence, Vol. PAMI-2, No. 1, 67-71.
4. Gates, G.W. (1972) "The Reduced Nearest Neighbor Rule". IEEE
Transactions on Information Theory, May 1972, 431-433.
"""
import os
import sys
import numpy
from . import driver #driver interface
names = ['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width']
"""Names of the features for each entry in the dataset."""
stats = {
'Sepal Length': [4.3, 7.9, 5.84, 0.83, 0.7826],
'Sepal Width': [2.0, 4.4, 3.05, 0.43, -0.4194],
'Petal Length': [1.0, 6.9, 3.76, 1.76, 0.9490], #high correlation
'Petal Width': [0.1, 2.5, 1.20, 0.76, 0.9565], #high correlation
}
"""These are basic statistics for each of the features in the whole dataset."""
stat_names = ['Minimum', 'Maximum', 'Mean', 'Std.Dev.', 'Correlation']
"""These are the statistics available in each column of the stats variable."""
def data():
"""Loads from (text) file and returns Fisher's Iris Dataset.
This set is small and simple enough to require an SQL backend. We keep the
single file it has in text and load it on-the-fly every time this method is
called.
We return a dictionary containing the 3 classes of Iris plants catalogued in
this dataset. Each dictionary entry contains an 2D :py:class:`numpy.ndarray`
of 64-bit floats and 50 entries. Each entry is an Array with 4 features as
described by "names".
"""
from .driver import Interface
import csv
data = Interface().files()[0]
retval = {}
# The CSV file reader API changed between Python2 and Python3
open_dict = dict(mode='rb') #python2.x
if sys.version_info[0] >= 3: #python3.x
open_dict = dict(mode='rt', encoding='ascii', newline='')
with open(data, **open_dict) as csvfile:
for row in csv.reader(csvfile):
name = row[4][5:].lower()
retval.setdefault(name, []).append([float(k) for k in row[:4]])
# Convert to a float64 2D numpy.ndarray
for key, value in retval.items():
retval[key] = numpy.array(value, dtype='float64')
return retval
def __dump__(args):
"""Dumps the database to stdout.
Keyword arguments:
args
A argparse.Arguments object with options set. We use two of the options:
``cls`` for the class to be dumped (if None, then dump all data) and
``selftest``, which runs the internal test.
"""
d = data()
if args.cls: d = {args.cls: d[args.cls]}
output = sys.stdout
if args.selftest:
from ..utils import null
output = null()
for k, v in d.items():
for array in v:
s = ','.join(['%.1f' % array[i] for i in range(array.shape[0])] + [k])
output.write('%s\n' % (s,))
return 0
__all__ = ['names', 'stats', 'stat_names', 'data']
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Andre Anjos <andre.dos.anjos@gmail.com>
# Tue 14 Aug 20:34:00 2012
"""Interface definition for Bob's database driver
"""
from ..driver import Interface as AbstractInterface
class Interface(AbstractInterface):
"""Bob Manager interface for the Iris Flower Database"""
def name(self):
'''Returns a simple name for this database, w/o funny characters, spaces'''
return 'iris'
def files(self):
'''Returns a python iterable with all auxiliary files needed.
The values should be take w.r.t. where the python file that declares the
database is sitting at.
'''
from pkg_resources import resource_filename
raw_files = ('iris.data', 'iris.names')
return [resource_filename(__name__, k) for k in raw_files]
def version(self):
'''Returns the current version number from Bob's build'''
import pkg_resources # part of setuptools
version = pkg_resources.require('xbob.db')[0].version
return version + ' (built-in)'
def type(self):
'''Returns the type of auxiliary files you have for this database
If you return 'sqlite', then we append special actions such as 'dbshell'
on 'xbob_dbmanage.py' automatically for you. Otherwise, we don't.
If you use auxiliary text files, just return 'text'. We may provide
special services for those types in the future.
Use the special name 'builtin' if this database is an integral part of Bob.
'''
return 'builtin'
def add_commands(self, parser):
"""A few commands this database can respond to."""
from argparse import SUPPRESS
from . import __doc__ as docs
subparsers = self.setup_parser(parser, "Fisher's Iris Flower dataset", docs)
# get the "dumplist" action from a submodule
dump_message = "Dumps the database in comma-separate-value format"
dump_parser = subparsers.add_parser('dump', help=dump_message)
dump_parser.add_argument('-c', '--class', dest="cls", default='', help="if given, limits the dump to a particular subset of the data that corresponds to the given class (defaults to '%(default)s')", choices=('setosa', 'versicolor', 'virginica', ''))
dump_parser.add_argument('--self-test', dest="selftest", default=False,
action='store_true', help=SUPPRESS)
from . import __dump__
dump_parser.set_defaults(func=__dump__)
5.1,3.5,1.4,0.2,Iris-setosa
4.9,3.0,1.4,0.2,Iris-setosa
4.7,3.2,1.3,0.2,Iris-setosa
4.6,3.1,1.5,0.2,Iris-setosa
5.0,3.6,1.4,0.2,Iris-setosa
5.4,3.9,1.7,0.4,Iris-setosa
4.6,3.4,1.4,0.3,Iris-setosa
5.0,3.4,1.5,0.2,Iris-setosa
4.4,2.9,1.4,0.2,Iris-setosa
4.9,3.1,1.5,0.1,Iris-setosa
5.4,3.7,1.5,0.2,Iris-setosa
4.8,3.4,1.6,0.2,Iris-setosa
4.8,3.0,1.4,0.1,Iris-setosa
4.3,3.0,1.1,0.1,Iris-setosa
5.8,4.0,1.2,0.2,Iris-setosa
5.7,4.4,1.5,0.4,Iris-setosa
5.4,3.9,1.3,0.4,Iris-setosa
5.1,3.5,1.4,0.3,Iris-setosa
5.7,3.8,1.7,0.3,Iris-setosa
5.1,3.8,1.5,0.3,Iris-setosa
5.4,3.4,1.7,0.2,Iris-setosa
5.1,3.7,1.5,0.4,Iris-setosa
4.6,3.6,1.0,0.2,Iris-setosa
5.1,3.3,1.7,0.5,Iris-setosa
4.8,3.4,1.9,0.2,Iris-setosa
5.0,3.0,1.6,0.2,Iris-setosa
5.0,3.4,1.6,0.4,Iris-setosa
5.2,3.5,1.5,0.2,Iris-setosa
5.2,3.4,1.4,0.2,Iris-setosa
4.7,3.2,1.6,0.2,Iris-setosa
4.8,3.1,1.6,0.2,Iris-setosa
5.4,3.4,1.5,0.4,Iris-setosa
5.2,4.1,1.5,0.1,Iris-setosa
5.5,4.2,1.4,0.2,Iris-setosa
4.9,3.1,1.5,0.2,Iris-setosa
5.0,3.2,1.2,0.2,Iris-setosa
5.5,3.5,1.3,0.2,Iris-setosa
4.9,3.6,1.4,0.1,Iris-setosa
4.4,3.0,1.3,0.2,Iris-setosa
5.1,3.4,1.5,0.2,Iris-setosa
5.0,3.5,1.3,0.3,Iris-setosa
4.5,2.3,1.3,0.3,Iris-setosa
4.4,3.2,1.3,0.2,Iris-setosa
5.0,3.5,1.6,0.6,Iris-setosa
5.1,3.8,1.9,0.4,Iris-setosa
4.8,3.0,1.4,0.3,Iris-setosa
5.1,3.8,1.6,0.2,Iris-setosa
4.6,3.2,1.4,0.2,Iris-setosa
5.3,3.7,1.5,0.2,Iris-setosa
5.0,3.3,1.4,0.2,Iris-setosa
7.0,3.2,4.7,1.4,Iris-versicolor
6.4,3.2,4.5,1.5,Iris-versicolor
6.9,3.1,4.9,1.5,Iris-versicolor
5.5,2.3,4.0,1.3,Iris-versicolor
6.5,2.8,4.6,1.5,Iris-versicolor
5.7,2.8,4.5,1.3,Iris-versicolor
6.3,3.3,4.7,1.6,Iris-versicolor
4.9,2.4,3.3,1.0,Iris-versicolor
6.6,2.9,4.6,1.3,Iris-versicolor
5.2,2.7,3.9,1.4,Iris-versicolor
5.0,2.0,3.5,1.0,Iris-versicolor
5.9,3.0,4.2,1.5,Iris-versicolor
6.0,2.2,4.0,1.0,Iris-versicolor
6.1,2.9,4.7,1.4,Iris-versicolor
5.6,2.9,3.6,1.3,Iris-versicolor
6.7,3.1,4.4,1.4,Iris-versicolor
5.6,3.0,4.5,1.5,Iris-versicolor
5.8,2.7,4.1,1.0,Iris-versicolor
6.2,2.2,4.5,1.5,Iris-versicolor
5.6,2.5,3.9,1.1,Iris-versicolor
5.9,3.2,4.8,1.8,Iris-versicolor
6.1,2.8,4.0,1.3,Iris-versicolor
6.3,2.5,4.9,1.5,Iris-versicolor
6.1,2.8,4.7,1.2,Iris-versicolor
6.4,2.9,4.3,1.3,Iris-versicolor
6.6,3.0,4.4,1.4,Iris-versicolor
6.8,2.8,4.8,1.4,Iris-versicolor
6.7,3.0,5.0,1.7,Iris-versicolor
6.0,2.9,4.5,1.5,Iris-versicolor
5.7,2.6,3.5,1.0,Iris-versicolor
5.5,2.4,3.8,1.1,Iris-versicolor
5.5,2.4,3.7,1.0,Iris-versicolor
5.8,2.7,3.9,1.2,Iris-versicolor
6.0,2.7,5.1,1.6,Iris-versicolor
5.4,3.0,4.5,1.5,Iris-versicolor
6.0,3.4,4.5,1.6,Iris-versicolor
6.7,3.1,4.7,1.5,Iris-versicolor
6.3,2.3,4.4,1.3,Iris-versicolor
5.6,3.0,4.1,1.3,Iris-versicolor
5.5,2.5,4.0,1.3,Iris-versicolor
5.5,2.6,4.4,1.2,Iris-versicolor
6.1,3.0,4.6,1.4,Iris-versicolor
5.8,2.6,4.0,1.2,Iris-versicolor
5.0,2.3,3.3,1.0,Iris-versicolor
5.6,2.7,4.2,1.3,Iris-versicolor
5.7,3.0,4.2,1.2,Iris-versicolor
5.7,2.9,4.2,1.3,Iris-versicolor
6.2,2.9,4.3,1.3,Iris-versicolor
5.1,2.5,3.0,1.1,Iris-versicolor
5.7,2.8,4.1,1.3,Iris-versicolor
6.3,3.3,6.0,2.5,Iris-virginica
5.8,2.7,5.1,1.9,Iris-virginica
7.1,3.0,5.9,2.1,Iris-virginica
6.3,2.9,5.6,1.8,Iris-virginica
6.5,3.0,5.8,2.2,Iris-virginica
7.6,3.0,6.6,2.1,Iris-virginica
4.9,2.5,4.5,1.7,Iris-virginica
7.3,2.9,6.3,1.8,Iris-virginica
6.7,2.5,5.8,1.8,Iris-virginica
7.2,3.6,6.1,2.5,Iris-virginica
6.5,3.2,5.1,2.0,Iris-virginica
6.4,2.7,5.3,1.9,Iris-virginica
6.8,3.0,5.5,2.1,Iris-virginica
5.7,2.5,5.0,2.0,Iris-virginica
5.8,2.8,5.1,2.4,Iris-virginica
6.4,3.2,5.3,2.3,Iris-virginica
6.5,3.0,5.5,1.8,Iris-virginica
7.7,3.8,6.7,2.2,Iris-virginica
7.7,2.6,6.9,2.3,Iris-virginica
6.0,2.2,5.0,1.5,Iris-virginica
6.9,3.2,5.7,2.3,Iris-virginica
5.6,2.8,4.9,2.0,Iris-virginica
7.7,2.8,6.7,2.0,Iris-virginica
6.3,2.7,4.9,1.8,Iris-virginica
6.7,3.3,5.7,2.1,Iris-virginica
7.2,3.2,6.0,1.8,Iris-virginica
6.2,2.8,4.8,1.8,Iris-virginica
6.1,3.0,4.9,1.8,Iris-virginica
6.4,2.8,5.6,2.1,Iris-virginica
7.2,3.0,5.8,1.6,Iris-virginica
7.4,2.8,6.1,1.9,Iris-virginica
7.9,3.8,6.4,2.0,Iris-virginica
6.4,2.8,5.6,2.2,Iris-virginica
6.3,2.8,5.1,1.5,Iris-virginica
6.1,2.6,5.6,1.4,Iris-virginica
7.7,3.0,6.1,2.3,Iris-virginica
6.3,3.4,5.6,2.4,Iris-virginica
6.4,3.1,5.5,1.8,Iris-virginica
6.0,3.0,4.8,1.8,Iris-virginica
6.9,3.1,5.4,2.1,Iris-virginica
6.7,3.1,5.6,2.4,Iris-virginica
6.9,3.1,5.1,2.3,Iris-virginica
5.8,2.7,5.1,1.9,Iris-virginica
6.8,3.2,5.9,2.3,Iris-virginica
6.7,3.3,5.7,2.5,Iris-virginica
6.7,3.0,5.2,2.3,Iris-virginica
6.3,2.5,5.0,1.9,Iris-virginica
6.5,3.0,5.2,2.0,Iris-virginica
6.2,3.4,5.4,2.3,Iris-virginica
5.9,3.0,5.1,1.8,Iris-virginica
1. Title: Iris Plants Dataset
Updated Sept 21 by C.Blake - Added discrepency information
2. Sources:
(a) Creator: R.A. Fisher
(b) Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
(c) Date: July, 1988
3. Past Usage:
- Publications: too many to mention!!! Here are a few.
1. Fisher,R.A. "The use of multiple measurements in taxonomic problems"
Annual Eugenics, 7, Part II, 179-188 (1936); also in "Contributions
to Mathematical Statistics" (John Wiley, NY, 1950).
2. Duda,R.O., & Hart,P.E. (1973) Pattern Classification and Scene Analysis.
(Q327.D83) John Wiley & Sons. ISBN 0-471-22361-1. See page 218.
3. Dasarathy, B.V. (1980) "Nosing Around the Neighborhood: A New System
Structure and Classification Rule for Recognition in Partially Exposed
Environments". IEEE Transactions on Pattern Analysis and Machine
Intelligence, Vol. PAMI-2, No. 1, 67-71.
-- Results:
-- very low misclassification rates (0% for the setosa class)
4. Gates, G.W. (1972) "The Reduced Nearest Neighbor Rule". IEEE
Transactions on Information Theory, May 1972, 431-433.
-- Results:
-- very low misclassification rates again
5. See also: 1988 MLC Proceedings, 54-64. Cheeseman et al's AUTOCLASS II
conceptual clustering system finds 3 classes in the data.
4. Relevant Information:
--- This is perhaps the best known dataset to be found in the pattern
recognition literature. Fisher's paper is a classic in the field
and is referenced frequently to this day. (See Duda & Hart, for
example.) The data set contains 3 classes of 50 instances each,
where each class refers to a type of iris plant. One class is
linearly separable from the other 2; the latter are NOT linearly
separable from each other.
--- Predicted attribute: class of iris plant.
--- This is an exceedingly simple domain.
5. Number of Instances: 150 (50 in each of three classes)
6. Number of Attributes: 4 numeric, predictive attributes and the class
7. Attribute Information:
1. sepal length in cm
2. sepal width in cm
3. petal length in cm
4. petal width in cm
5. class:
-- Iris Setosa
-- Iris Versicolour
-- Iris Virginica
8. Missing Attribute Values: None
Summary Statistics:
Min Max Mean SD Class Correlation
sepal length: 4.3 7.9 5.84 0.83 0.7826
sepal width: 2.0 4.4 3.05 0.43 -0.4194
petal length: 1.0 6.9 3.76 1.76 0.9490 (high!)
petal width: 0.1 2.5 1.20 0.76 0.9565 (high!)
9. Class Distribution: 33.3% for each of 3 classes.
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Andre Anjos <andre.anjos@idiap.ch>
# Tue 21 Aug 2012 13:20:38 CEST
"""Tests various examples for xbob.db
"""
import unittest
from ...test import utils
class ExampleTest(unittest.TestCase):
def test01_iris_lda(self):
from ..example.iris_lda import main
cmdline = ['--self-test']
self.assertEqual(main(cmdline), 0)
@utils.ffmpeg_found()
def test02_iris_backprop(self):
from ..example.iris_backprop import main
cmdline = ['--self-test']
self.assertEqual(main(cmdline), 0)
@utils.ffmpeg_found()
def test03_iris_rprop(self):
from ..example.iris_rprop import main
cmdline = ['--self-test']
self.assertEqual(main(cmdline), 0)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment