diff --git a/bob/bio/base/extractor/stacks.py b/bob/bio/base/extractor/stacks.py index 0f85207a9fbdee3b9d89ea1609f0e5996ca57096..b34696d0fca216e56646ab945b58afacdb5a29e1 100644 --- a/bob/bio/base/extractor/stacks.py +++ b/bob/bio/base/extractor/stacks.py @@ -42,20 +42,13 @@ class MultipleExtractor(Extractor): # when e here wants it flat but the data is split else: # make training_data flat - training_data_len = [len(datalist) for datalist in training_data] - training_data = [d for datalist in training_data for d in datalist] - e.train(training_data, extractor_file) + aligned_training_data = [d for datalist in training_data for d in + datalist] + e.train(aligned_training_data, extractor_file) if not apply: return - # split training data - new_training_data, i = [], 0 - for length in training_data_len: - class_data = [] - for _ in range(length): - class_data.append(e(training_data[i])) - i += 1 - new_training_data.append(class_data) - training_data = new_training_data + training_data = [[e(d) for d in datalist] + for datalist in training_data] return training_data def load(self, extractor_file): @@ -96,7 +89,7 @@ class SequentialExtractor(SequentialProcessor, MultipleExtractor): return self.processors[-1].read_feature(feature_file) def write_feature(self, feature, feature_file): - return self.processors[-1].write_feature(feature, feature_file) + self.processors[-1].write_feature(feature, feature_file) class ParallelExtractor(ParallelProcessor, MultipleExtractor): @@ -134,11 +127,24 @@ class CallableExtractor(Extractor): callable : object Anything that is callable. It will be used as an extractor in bob.bio.base. + read_feature : object + A callable object with the signature of + ``feature = read_feature(feature_file)``. If not provided, the default + implementation handles numpy arrays. + write_feature : object + A callable object with the signature of + ``write_feature(feature, feature_file)``. If not provided, the default + implementation handles numpy arrays. """ - def __init__(self, callable, **kwargs): + def __init__(self, callable, write_feature=None, read_feature=None, + **kwargs): super(CallableExtractor, self).__init__(**kwargs) self.callable = callable + if write_feature is not None: + self.write_feature = write_feature + if read_feature is not None: + self.read_feature = read_feature def __call__(self, data): return self.callable(data) diff --git a/bob/bio/base/preprocessor/stacks.py b/bob/bio/base/preprocessor/stacks.py index 1a4b31005b7d357c4f17a605a9dd1d58e3af1405..696bd59c353de38836606a26c56d8b1b8d474de1 100644 --- a/bob/bio/base/preprocessor/stacks.py +++ b/bob/bio/base/preprocessor/stacks.py @@ -26,7 +26,7 @@ class SequentialPreprocessor(SequentialProcessor, Preprocessor): return self.processors[-1].read_data(data_file) def write_data(self, data, data_file): - return self.processors[-1].write_data(data, data_file) + self.processors[-1].write_data(data, data_file) class ParallelPreprocessor(ParallelProcessor, Preprocessor): @@ -57,14 +57,27 @@ class CallablePreprocessor(Preprocessor): callable : object Anything that is callable. It will be used as a preprocessor in bob.bio.base. + read_data : object + A callable object with the signature of + ``data = read_data(data_file)``. If not provided, the default + implementation handles numpy arrays. + write_data : object + A callable object with the signature of + ``write_data(data, data_file)``. If not provided, the default + implementation handles numpy arrays. """ - def __init__(self, callable, accepts_annotations=True, **kwargs): + def __init__(self, callable, accepts_annotations=True, write_data=None, + read_data=None, **kwargs): super(CallablePreprocessor, self).__init__( callable=callable, accepts_annotations=accepts_annotations, **kwargs) self.callable = callable self.accepts_annotations = accepts_annotations + if write_data is not None: + self.write_data = write_data + if read_data is not None: + self.read_data = read_data def __call__(self, data, annotations): if self.accepts_annotations: diff --git a/bob/bio/base/test/test_stacks.py b/bob/bio/base/test/test_stacks.py index 3628902de8f39dba21d79e4a5a0498b940cd22e6..926901382af5455ee292ace46abc885582baaaa6 100644 --- a/bob/bio/base/test/test_stacks.py +++ b/bob/bio/base/test/test_stacks.py @@ -10,7 +10,7 @@ from bob.bio.base.extractor import ( DATA = [0, 1, 2, 3, 4] PROCESSORS = [partial(np.power, 2), np.mean] SEQ_DATA = PROCESSORS[1](PROCESSORS[0](DATA)) -PAR_DATA = np.hstack([PROCESSORS[0](DATA), PROCESSORS[1](DATA)]) +PAR_DATA = (PROCESSORS[0](DATA), PROCESSORS[1](DATA)) def test_processors(): @@ -20,7 +20,7 @@ def test_processors(): proc = ParallelProcessor(PROCESSORS) data = proc(DATA) - assert np.allclose(data, PAR_DATA) + assert all(np.allclose(x1, x2) for x1, x2 in zip(data, PAR_DATA)) def test_preprocessors(): @@ -31,7 +31,7 @@ def test_preprocessors(): proc = ParallelPreprocessor(processors) data = proc(DATA, None) - assert np.allclose(data, PAR_DATA) + assert all(np.allclose(x1, x2) for x1, x2 in zip(data, PAR_DATA)) def test_extractors(): @@ -42,4 +42,4 @@ def test_extractors(): proc = ParallelExtractor(processors) data = proc(DATA) - assert np.allclose(data, PAR_DATA) + assert all(np.allclose(x1, x2) for x1, x2 in zip(data, PAR_DATA)) diff --git a/bob/bio/base/utils/processors.py b/bob/bio/base/utils/processors.py index 1939d860b63bcabf6e9944c35dce30a5960a0b1d..99560bdbfff4e3521c656dd0c7dbf0a35acad342 100644 --- a/bob/bio/base/utils/processors.py +++ b/bob/bio/base/utils/processors.py @@ -1,6 +1,3 @@ -import numpy - - class SequentialProcessor(object): """A helper class which takes several processors and applies them one by one sequentially @@ -38,24 +35,21 @@ class SequentialProcessor(object): class ParallelProcessor(object): """A helper class which takes several processors and applies them on each - processor separately and outputs a list of their outputs in the end. + processor separately and yields their outputs one by one. Attributes ---------- processors : list A list of processors to apply. - stack : bool - If True (default), :any:`numpy.hstack` is called on the list of outputs """ - def __init__(self, processors, stack=True, **kwargs): + def __init__(self, processors, **kwargs): super(ParallelProcessor, self).__init__() self.processors = processors - self.stack = stack def __call__(self, data, **kwargs): - """Applies the processors on the data independently and outputs a list of - their outputs. + """Applies the processors on the data independently and outputs a + generator of their outputs. Parameters ---------- @@ -64,15 +58,10 @@ class ParallelProcessor(object): **kwargs Any kwargs are passed to the processors. - Returns - ------- + Yields + ------ object - The processed data. + The processed data from processors one by one. """ - output = [] for processor in self.processors: - out = processor(data, **kwargs) - output.append(out) - if self.stack: - output = numpy.hstack(output) - return output + yield processor(data, **kwargs)