Commit fc4b7503 authored by Jaden DIEFENBAUGH's avatar Jaden DIEFENBAUGH

init README & prefix folder

parents
.. vim: set fileencoding=utf-8 :
======================
BEAT Tutorial Prefix
======================
This package contains the prefix folder for BEAT tutorials. Run any
BEAT commands relating to the prefix in the top-level folder of this
project, next to the `prefix` folder.
Support
-------
In case of problems, start a discussion at our `BEAT development mailing
list`_.
.. Place your references here:
.. _beat development mailing list: https://groups.google.com/forum/#!forum/beat-devel
{
"description": "",
"groups": [
{
"inputs": {
"scores": {
"type": "system/float/1"
},
"species": {
"type": "system/text/1"
}
},
"name": "group"
}
],
"language": "python",
"parameters": {},
"results": {
"eer": {
"display": false,
"type": "float32"
},
"roc": {
"display": false,
"type": "plot/isoroc/1"
},
"scores_distribution": {
"display": false,
"type": "plot/bar/1"
}
},
"uses": {}
}
\ No newline at end of file
# You may import any python packages that will be available in the environment you will run this algorithm in
# Environments can change based on the experiment's settings
import numpy
import bob.measure
class Algorithm:
# initialise fields to store cross-input data (e.g. machines, aggregations, etc.)
def __init__(self):
self.scores = {}
self.n_bins = 20
# this will be called each time the sync'd input has more data available to be processed
def process(self, inputs, outputs):
# to check if there is more data waiting in the inputs
# (if it is False, you have processed all the inputs and this "process" function won't be called again):
# if inputs.hasMoreData():
# to check if a specific input is done:
# if inputs["input1"].isDataUnitDone():
# to manually fetch the next input of a specific input
# (e.g. the block is not sync'd to the input but you want the input immediately)
# inputs['input1'].next()
# you can then access that input value as normal:
# self.val1 = inputs['input1'].data
# to get the data for an input (note that the value will be of the type specified in the metadata!):
# data_value = inputs['input1'].data
# to write to an output:
# outputs['output1'].write({
# 'output_field_1': 1,
# 'output_field_2': 'output'
# })
species = inputs['species'].data.text
score = inputs['scores'].data.value
if species in self.scores: self.scores[species].append(score)
else: self.scores[species] = [score]
if not inputs.hasMoreData():
# just perf info on virginica
negatives = numpy.array(self.scores['setosa'] + self.scores['versicolor'])
positives = self.scores['virginica']
threshold = bob.measure.eer_threshold(negatives, positives)
far, frr = bob.measure.farfrr(negatives, positives, threshold)
roc_points = bob.measure.roc(negatives, positives, 100)
histo_pos, bin_pos = numpy.histogram(positives, self.n_bins)
histo_neg, bin_neg = numpy.histogram(negatives, self.n_bins)
threshold32 = numpy.float32(threshold)
far32 = numpy.float32(far)
frr32 = numpy.float32(frr)
outputs.write({
'eer': far32,
'scores_distribution': {
"data":
[
{
"label": "negative scores",
"x": bin_neg[:-1],
"y": histo_neg,
},
{
"label": "positive scores",
"x": bin_pos[:-1],
"y": histo_pos,
}
]
},
'roc': {
"data":
[
{
"label": "roc",
"false_positives": roc_points[0],
"false_negatives": roc_points[1],
"number_of_positives": numpy.uint64(len(positives)),
"number_of_negatives": numpy.uint64(len(negatives)),
}
]
}
})
# always return True, it signals BEAT to continue processing
return True
{
"description": "",
"groups": [
{
"inputs": {
"scores": {
"type": "system/float/1"
},
"species": {
"type": "system/text/1"
}
},
"name": "group"
}
],
"language": "python",
"parameters": {},
"results": {
"eer": {
"display": false,
"type": "float32"
},
"roc": {
"display": false,
"type": "plot/isoroc/1"
},
"scores_distribution": {
"display": false,
"type": "plot/bar/1"
}
},
"uses": {}
}
\ No newline at end of file
# You may import any python packages that will be available in the environment you will run this algorithm in
# Environments can change based on the experiment's settings
import numpy
import bob.measure
class Algorithm:
# initialise fields to store cross-input data (e.g. machines, aggregations, etc.)
def __init__(self):
self.scores = {}
self.n_bins = 20
pass
# this will be called each time the sync'd input has more data available to be processed
def process(self, inputs, outputs):
# to check if there is more data waiting in the inputs
# (if it is False, you have processed all the inputs and this "process" function won't be called again):
# if inputs.hasMoreData():
# to check if a specific input is done:
# if inputs["input1"].isDataUnitDone():
# to manually fetch the next input of a specific input
# (e.g. the block is not sync'd to the input but you want the input immediately)
# inputs['input1'].next()
# you can then access that input value as normal:
# self.val1 = inputs['input1'].data
# to get the data for an input (note that the value will be of the type specified in the metadata!):
# data_value = inputs['input1'].data
# to write to an output:
# outputs['output1'].write({
# 'output_field_1': 1,
# 'output_field_2': 'output'
# })
species = inputs['species'].data.text
score = inputs['scores'].data.value
if species in self.scores: self.scores[species].append(score)
else: self.scores[species] = [score]
if not inputs.hasMoreData():
# just perf info on virginica
negatives = numpy.array(self.scores['setosa'] + self.scores['versicolor'])
positives = self.scores['virginica']
threshold = bob.measure.eer_threshold(negatives, positives)
far, frr = bob.measure.farfrr(negatives, positives, threshold)
roc_points = bob.measure.roc(negatives, positives, 100)
histo_pos, bin_pos = numpy.histogram(positives, self.n_bins)
histo_neg, bin_neg = numpy.histogram(negatives, self.n_bins)
threshold32 = numpy.float32(threshold)
#far32 = numpy.float32(far)
far32 = far
#print('EER type: %s' % type(far32))
#import ipdb;ipdb.set_trace()
frr32 = numpy.float32(frr)
outputs.write({
'eer': far32,
'scores_distribution': {
"data":
[
{
"label": "negative scores",
"x": bin_neg[:-1],
"y": histo_neg,
},
{
"label": "positive scores",
"x": bin_pos[:-1],
"y": histo_pos,
}
]
},
'roc': {
"data":
[
{
"label": "roc",
"false_positives": roc_points[0],
"false_negatives": roc_points[1],
"number_of_positives": numpy.uint64(len(positives)),
"number_of_negatives": numpy.uint64(len(negatives)),
}
]
}
})
# always return True, it signals BEAT to continue processing
return True
{
"description": "Rounds each measurement in an Iris dataset sample to the nearest integer",
"groups": [
{
"inputs": {
"measurements": {
"type": "system/array_1d_floats/1"
}
},
"name": "group",
"outputs": {
"measurements": {
"type": "system/array_1d_floats/1"
}
}
}
],
"language": "python",
"parameters": {},
"splittable": false,
"uses": {}
}
\ No newline at end of file
# You may import any python packages that will be available in the environment you will run this algorithm in
# Environments can change based on the experiment's settings
import numpy
class Algorithm:
# initialise fields to store cross-input data (e.g. machines, aggregations, etc.)
def __init__(self):
pass
# this will be called each time the sync'd input has more data available to be processed
def process(self, inputs, outputs):
# to check if there is more data waiting in the inputs
# (if it is False, you have processed all the inputs and this "process" function won't be called again):
# if inputs.hasMoreData():
# to check if a specific input is done:
# if inputs["input1"].isDataUnitDone():
# to manually fetch the next input of a specific input
# (e.g. the block is not sync'd to the input but you want the input immediately)
# inputs['input1'].next()
# you can then access that input value as normal:
# self.val1 = inputs['input1'].data
# to get the data for an input (note that the value will be of the type specified in the metadata!):
# data_value = inputs['input1'].data
# to write to an output:
# outputs['output1'].write({
# 'output_field_1': 1,
# 'output_field_2': 'output'
# })
# always return True, it signals BEAT to continue processing
measurements = inputs['measurements'].data.value
processed = [round(m) for m in measurements]
outputs['measurements'].write({
'value': processed
})
return True
{
"description": "",
"groups": [
{
"inputs": {
"measurements": {
"type": "system/array_1d_floats/1"
}
},
"name": "group",
"outputs": {
"scores": {
"type": "system/float/1"
}
}
},
{
"inputs": {
"lda_machine": {
"type": "tutorial/linear_machine/1"
}
},
"name": "group0"
}
],
"language": "python",
"parameters": {},
"splittable": false,
"uses": {}
}
\ No newline at end of file
# You may import any python packages that will be available in the environment you will run this algorithm in
# Environments can change based on the experiment's settings
import bob.learn.linear
def deserialize_machine(data):
"""Unmangles a bob.machine.LinearMachine"""
machine = bob.learn.linear.Machine(data.weights)
machine.biases = data.biases
machine.input_subtract = data.input_subtract
machine.input_divide = data.input_divide
return machine
class Algorithm:
# initialise fields to store cross-input data (e.g. machines, aggregations, etc.)
def __init__(self):
self.machine = None
pass
# this will be called each time the sync'd input has more data available to be processed
def process(self, inputs, outputs):
# to check if there is more data waiting in the inputs
# (if it is False, you have processed all the inputs and this "process" function won't be called again):
# if inputs.hasMoreData():
# to check if a specific input is done:
# if inputs["input1"].isDataUnitDone():
# to manually fetch the next input of a specific input
# (e.g. the block is not sync'd to the input but you want the input immediately)
# inputs['input1'].next()
# you can then access that input value as normal:
# self.val1 = inputs['input1'].data
# to get the data for an input (note that the value will be of the type specified in the metadata!):
# data_value = inputs['input1'].data
# to write to an output:
# outputs['output1'].write({
# 'output_field_1': 1,
# 'output_field_2': 'output'
# })
if self.machine is None:
inputs['lda_machine'].next()
data = inputs['lda_machine'].data
self.machine = deserialize_machine(data)
measurement = inputs['measurements'].data.value
scores = self.machine(measurement)
outputs['scores'].write({
'value': scores[0]
})
# always return True, it signals BEAT to continue processing
return True
{
"description": "",
"groups": [
{
"inputs": {
"measurements": {
"type": "system/array_1d_floats/1"
},
"species": {
"type": "system/text/1"
}
},
"name": "group",
"outputs": {
"lda_machine": {
"type": "tutorial/linear_machine/1"
}
}
}
],
"language": "python",
"parameters": {},
"splittable": false,
"uses": {}
}
\ No newline at end of file
# You may import any python packages that will be available in the environment you will run this algorithm in
# Environments can change based on the experiment's settings
import bob.learn.linear
class Algorithm:
# initialise fields to store cross-input data (e.g. machines, aggregations, etc.)
def __init__(self):
self.measurements = {}
# this will be called each time the sync'd input has more data available to be processed
def process(self, inputs, outputs):
# to check if there is more data waiting in the inputs
# (if it is False, you have processed all the inputs and this "process" function won't be called again):
# if inputs.hasMoreData():
# to check if a specific input is done:
# if inputs["input1"].isDataUnitDone():
# to manually fetch the next input of a specific input
# (e.g. the block is not sync'd to the input but you want the input immediately)
# inputs['input1'].next()
# you can then access that input value as normal:
# self.val1 = inputs['input1'].data
# to get the data for an input (note that the value will be of the type specified in the metadata!):
# data_value = inputs['input1'].data
# to write to an output:
# outputs['output1'].write({
# 'output_field_1': 1,
# 'output_field_2': 'output'
# })
species = inputs['species'].data.text
measurement = inputs['measurements'].data.value
if species in self.measurements: self.measurements[species].append(measurement)
else: self.measurements[species] = [measurement]
if not inputs.hasMoreData():
arrs = [ self.measurements[k] for k in self.measurements.keys() ]
machine, eigen_values = bob.learn.linear.FisherLDATrainer().train(arrs)
outputs['lda_machine'].write({
'input_subtract': machine.input_subtract,
'input_divide': machine.input_divide,
'weights': machine.weights,
'biases': machine.biases,
})
# always return True, it signals BEAT to continue processing
return True
{
"description": "This algorithm linearizes and accumulates images into a buffer",
"groups": [
{
"inputs": {
"id": {
"type": "system/uint64/1"
},
"image": {
"type": "system/array_2d_uint8/1"
}
},
"name": "main",
"outputs": {
"projections": {
"type": "system/array_2d_floats/1"
}
}
},
{
"inputs": {
"subspace": {
"type": "tutorial/linear_machine/1"
}
}
}
],
"language": "python",
"parameters": {},
"splittable": true,
"uses": {}
}
\ No newline at end of file
import bob.learn.linear
import numpy
def linear_machine_from_data(data):
"""Unmangles a bob.machine.LinearMachine from a BEAT Data object"""
machine = bob.learn.linear.Machine(data.weights)
machine.biases = data.biases
machine.input_subtract = data.input_subtract
machine.input_divide = data.input_divide
return machine
class Algorithm:
def __init__(self):
self.machine = None
self.projections = []
def process(self, inputs, outputs):
# retrieve the linear machine once
if self.machine is None:
inputs['subspace'].next()
self.machine = linear_machine_from_data(inputs['subspace'].data)
# collect all the image projections for the current template
image = inputs['image'].data.value.astype('float64').flatten()
projection = self.machine.forward(image)
self.projections.append(projection)
# generate the results (when all the images of the current template have been
# projected)
if inputs["id"].isDataUnitDone():
outputs['projections'].write({
'value': numpy.array(self.projections, dtype=numpy.float64)
})
self.projections = []
return True
This algorithm linearizes and accumulates images into a buffer,
before applying a linear transformation (e.g. using a projection
matrix computed by principal component analysis).
The linear transformation relies on the `Bob <http://www.idiap.ch/software/bob>`_ library.
The inputs are:
* `image`: an image as a two-dimensional arrays of floats (64 bits)
* `id`: an identifier which is used as follows: all images with the
same identifier are accumulated into the same buffer
* `subspace`: a linear transformation as a collection of weights,
biases, input subtraction and input division factors.
The output `projections` is a two-dimensional array of floats (64 bits),
the number of rows corresponding to the number of accumulated images
(with the same identifier), and the number of columns to the output
dimensionality after applying the linear transformation.
\ No newline at end of file
{
"description": "This algorithm generates comparison scores",
"groups": [
{
"inputs": {
"comparison_ids": {
"type": "system/array_1d_uint64/1"
},
"probe_client_id": {
"type": "system/uint64/1"
},
"probe_id": {
"type": "system/uint64/1"
},
"probe_projections": {
"type": "system/array_2d_floats/1"
}
},
"name": "probes",
"outputs": {
"scores": {
"type": "tutorial/probe_scores/1"
}
}
},
{
"inputs": {
"template_client_id": {
"type": "system/uint64/1"
},
"template_id": {
"type": "system/uint64/1"
},
"template_projections": {
"type": "system/array_2d_floats/1"