Commit 3f237d78 authored by Manuel Günther's avatar Manuel Günther

Added function to retrieve BIC pairs between different factors; implemented...

Added function to retrieve BIC pairs between different factors; implemented BIC pairs as list comprehension
parent 2919ca2e
......@@ -10,36 +10,87 @@ def bic_intra_extra_pairs(training_data):
This function will return two lists of tuples of data, where the first list contains tuples of the same class, while the second list contains tuples of different classes.
These tuples can be used to compute difference vectors, which then can be fed into the :py:meth:`bob.learn.linear.BICTrainer.train` method.
Note that in general many more ``extra_pairs?`` than ``intra_pairs`` are returned.
Note that in general many more ``extra_pairs`` than ``intra_pairs`` are returned.
**Keyword parameters**
training_data : [[array_like]]
training_data : [[object]]
The training data, where the data for each class are enclosed in one list.
**Return values**
intra_pairs : [(array_like, array_like)]
intra_pairs : [(object, object)]
A list of tuples of data, where both data belong to the same class, where each data element is a reference to one element of the given ``training_data``.
extra_pairs : [(array_like, array_like)]
extra_pairs : [(object, object)]
A list of tuples of data, where both data belong to different classes, where each data element is a reference to one element of the given ``training_data``.
"""
# generate intra-class pairs
intra_pairs = []
for clazz in range(len(training_data)):
for c1 in range(len(training_data[clazz])-1):
for c2 in range (c1+1, len(training_data[clazz])):
intra_pairs.append((training_data[clazz][c1], training_data[clazz][c2]))
intra_pairs = [(training_data[clazz][c1], training_data[clazz][c2]) \
for clazz in range(len(training_data)) \
for c1 in range(len(training_data[clazz])-1) \
for c2 in range (c1+1, len(training_data[clazz]))
]
# generate extra-class pairs
extra_pairs = [(training_data[clazz1][c1], training_data[clazz2][c2]) \
for clazz1 in range(len(training_data)-1) \
for c1 in range(len(training_data[clazz1])) \
for clazz2 in range(clazz1+1, len(training_data)) \
for c2 in range(len(training_data[clazz2])) \
if clazz1 != clazz2
]
# return a tuple of pairs
return (intra_pairs, extra_pairs)
def bic_intra_extra_pairs_between_factors(first_factor, second_factor):
"""bic_intra_extra_pairs(training_data) -> intra_pairs, extra_pairs
Computes intra-class and extra-class pairs from given training data, where only pairs between the first and second factors are considered.
Both ``first_factor`` and ``second_factor`` should be aligned in a list of sub-lists, where corresponding sub-list contains the data of one class.
Both lists need to contain the same classes in the same order; empty classes (empty lists) are allowed.
This function will return two lists of tuples of data, where the first list contains tuples of the same class, while the second list contains tuples of different classes.
These tuples can be used to compute difference vectors, which then can be fed into the :py:meth:`bob.learn.linear.BICTrainer.train` method.
Note that in general many more ``extra_pairs`` than ``intra_pairs`` are returned.
**Keyword parameters**
first_factor : [[object]]
The training data for the first factor, where the data for each class are enclosed in one list.
second_factor : [[object]]
The training data for the second factor, where the data for each class are enclosed in one list.
Must have the same size as ``first_factor``.
**Return values**
intra_pairs : [(array_like, array_like)]
A list of tuples of data, where both data belong to the same class, but different factors.
extra_pairs : [(array_like, array_like)]
A list of tuples of data, where both data belong to different classes and different factors.
"""
assert len(first_factor) == len(second_factor), "The data for both factors must contain the same number of classes"
# generate intra-class pairs
intra_pairs = [(c1,c2) \
for clazz in range(len(first_factor)) \
for c1 in first_factor[clazz] \
for c2 in second_factor[clazz]
]
# generate extra-class pairs
extra_pairs = []
for clazz1 in range(len(training_data)-1):
for c1 in range(len(training_data[clazz1])):
for clazz2 in range(clazz1+1, len(training_data)):
if clazz1 != clazz2:
for c2 in range(len(training_data[clazz2])):
extra_pairs.append((training_data[clazz1][c1], training_data[clazz2][c2]))
extra_pairs = [(c1, c2) \
for clazz1 in range(len(first_factor)) \
for c1 in first_factor[clazz1] \
for clazz2 in range(len(second_factor)) \
for c2 in second_factor[clazz2] \
if clazz1 != clazz2
]
# return a tuple of pairs
return (intra_pairs, extra_pairs)
......@@ -128,3 +128,30 @@ def test_bic_split():
for v2 in c2:
# check that exactly one of the two possible pairs is inside
assert ((v1,v2) in extra_pairs) != ((v2,v1) in extra_pairs)
def test_bic_split_between_factors():
# Tests the auxiliary function bic_intra_extra_pairs_between_factors
factor1 = [[1,2,3],[4,5,6],[7,8,9]]
factor2 = [[11,12,13],[14,15,16],[17,18,19]]
intra_pairs, extra_pairs = bob.learn.linear.bic_intra_extra_pairs_between_factors(factor1, factor2)
# check number of pairs
assert len(intra_pairs) == 27
assert len(extra_pairs) == 54
# assert that all pairs are taken from factor 1 and factor 2, in the right order
assert all(p[0] < 10 and p[1] > 10 for pairs in (intra_pairs,extra_pairs) for p in pairs)
# check intra pairs
for c1, c2 in zip(factor1, factor2):
for f1 in c1:
for f2 in c2:
assert (f1, f2) in intra_pairs
# check extra pairs
for i1 in range(3):
for i2 in range(3):
if i1 != i2:
for f1 in factor1[i1]:
for f2 in factor2[i2]:
assert (f1, f2) in extra_pairs
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment