Commit 4698589e authored by Amir MOHAMMADI's avatar Amir MOHAMMADI
Browse files

remove numpy.hstack.

make two copies of training data to make the code simpler.
Add options for overridng read and write methods in callables.
remove confusing returns. make parallel ones generators
parent 4151b396
Pipeline #13319 passed with stages
in 13 minutes and 56 seconds
......@@ -42,20 +42,13 @@ class MultipleExtractor(Extractor):
# when e here wants it flat but the data is split
else:
# make training_data flat
training_data_len = [len(datalist) for datalist in training_data]
training_data = [d for datalist in training_data for d in datalist]
e.train(training_data, extractor_file)
aligned_training_data = [d for datalist in training_data for d in
datalist]
e.train(aligned_training_data, extractor_file)
if not apply:
return
# split training data
new_training_data, i = [], 0
for length in training_data_len:
class_data = []
for _ in range(length):
class_data.append(e(training_data[i]))
i += 1
new_training_data.append(class_data)
training_data = new_training_data
training_data = [[e(d) for d in datalist]
for datalist in training_data]
return training_data
def load(self, extractor_file):
......@@ -96,7 +89,7 @@ class SequentialExtractor(SequentialProcessor, MultipleExtractor):
return self.processors[-1].read_feature(feature_file)
def write_feature(self, feature, feature_file):
return self.processors[-1].write_feature(feature, feature_file)
self.processors[-1].write_feature(feature, feature_file)
class ParallelExtractor(ParallelProcessor, MultipleExtractor):
......@@ -134,11 +127,24 @@ class CallableExtractor(Extractor):
callable : object
Anything that is callable. It will be used as an extractor in
bob.bio.base.
read_feature : object
A callable object with the signature of
``feature = read_feature(feature_file)``. If not provided, the default
implementation handles numpy arrays.
write_feature : object
A callable object with the signature of
``write_feature(feature, feature_file)``. If not provided, the default
implementation handles numpy arrays.
"""
def __init__(self, callable, **kwargs):
def __init__(self, callable, write_feature=None, read_feature=None,
**kwargs):
super(CallableExtractor, self).__init__(**kwargs)
self.callable = callable
if write_feature is not None:
self.write_feature = write_feature
if read_feature is not None:
self.read_feature = read_feature
def __call__(self, data):
return self.callable(data)
......@@ -26,7 +26,7 @@ class SequentialPreprocessor(SequentialProcessor, Preprocessor):
return self.processors[-1].read_data(data_file)
def write_data(self, data, data_file):
return self.processors[-1].write_data(data, data_file)
self.processors[-1].write_data(data, data_file)
class ParallelPreprocessor(ParallelProcessor, Preprocessor):
......@@ -57,14 +57,27 @@ class CallablePreprocessor(Preprocessor):
callable : object
Anything that is callable. It will be used as a preprocessor in
bob.bio.base.
read_data : object
A callable object with the signature of
``data = read_data(data_file)``. If not provided, the default
implementation handles numpy arrays.
write_data : object
A callable object with the signature of
``write_data(data, data_file)``. If not provided, the default
implementation handles numpy arrays.
"""
def __init__(self, callable, accepts_annotations=True, **kwargs):
def __init__(self, callable, accepts_annotations=True, write_data=None,
read_data=None, **kwargs):
super(CallablePreprocessor, self).__init__(
callable=callable, accepts_annotations=accepts_annotations,
**kwargs)
self.callable = callable
self.accepts_annotations = accepts_annotations
if write_data is not None:
self.write_data = write_data
if read_data is not None:
self.read_data = read_data
def __call__(self, data, annotations):
if self.accepts_annotations:
......
......@@ -10,7 +10,7 @@ from bob.bio.base.extractor import (
DATA = [0, 1, 2, 3, 4]
PROCESSORS = [partial(np.power, 2), np.mean]
SEQ_DATA = PROCESSORS[1](PROCESSORS[0](DATA))
PAR_DATA = np.hstack([PROCESSORS[0](DATA), PROCESSORS[1](DATA)])
PAR_DATA = (PROCESSORS[0](DATA), PROCESSORS[1](DATA))
def test_processors():
......@@ -20,7 +20,7 @@ def test_processors():
proc = ParallelProcessor(PROCESSORS)
data = proc(DATA)
assert np.allclose(data, PAR_DATA)
assert all(np.allclose(x1, x2) for x1, x2 in zip(data, PAR_DATA))
def test_preprocessors():
......@@ -31,7 +31,7 @@ def test_preprocessors():
proc = ParallelPreprocessor(processors)
data = proc(DATA, None)
assert np.allclose(data, PAR_DATA)
assert all(np.allclose(x1, x2) for x1, x2 in zip(data, PAR_DATA))
def test_extractors():
......@@ -42,4 +42,4 @@ def test_extractors():
proc = ParallelExtractor(processors)
data = proc(DATA)
assert np.allclose(data, PAR_DATA)
assert all(np.allclose(x1, x2) for x1, x2 in zip(data, PAR_DATA))
import numpy
class SequentialProcessor(object):
"""A helper class which takes several processors and applies them one by
one sequentially
......@@ -38,24 +35,21 @@ class SequentialProcessor(object):
class ParallelProcessor(object):
"""A helper class which takes several processors and applies them on each
processor separately and outputs a list of their outputs in the end.
processor separately and yields their outputs one by one.
Attributes
----------
processors : list
A list of processors to apply.
stack : bool
If True (default), :any:`numpy.hstack` is called on the list of outputs
"""
def __init__(self, processors, stack=True, **kwargs):
def __init__(self, processors, **kwargs):
super(ParallelProcessor, self).__init__()
self.processors = processors
self.stack = stack
def __call__(self, data, **kwargs):
"""Applies the processors on the data independently and outputs a list of
their outputs.
"""Applies the processors on the data independently and outputs a
generator of their outputs.
Parameters
----------
......@@ -64,15 +58,10 @@ class ParallelProcessor(object):
**kwargs
Any kwargs are passed to the processors.
Returns
-------
Yields
------
object
The processed data.
The processed data from processors one by one.
"""
output = []
for processor in self.processors:
out = processor(data, **kwargs)
output.append(out)
if self.stack:
output = numpy.hstack(output)
return output
yield processor(data, **kwargs)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment