stacks.py 5.51 KB
Newer Older
1
2
3
4
5
from ..utils.processors import SequentialProcessor, ParallelProcessor
from .Preprocessor import Preprocessor


class SequentialPreprocessor(SequentialProcessor, Preprocessor):
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
6
7
    """A helper class which takes several preprocessors and applies them one by
    one sequentially.
8

Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
    Attributes
    ----------
    processors : list
        A list of preprocessors to apply.

    Examples
    --------
    You can use this class to apply a chain of preprocessors on your data. For
    example:

    >>> import numpy as np
    >>> from functools import  partial
    >>> from bob.bio.base.preprocessor import SequentialPreprocessor, CallablePreprocessor
    >>> raw_data = np.array([[1, 2, 3], [1, 2, 3]])
    >>> seq_preprocessor = SequentialPreprocessor(
    ...     [CallablePreprocessor(f, accepts_annotations=False) for f in
    ...      [np.cast['float64'], lambda x: x / 2, partial(np.mean, axis=1)]])
    >>> seq_preprocessor(raw_data)
    array([ 1.,  1.])
    >>> np.all(seq_preprocessor(raw_data) == \
    ...        np.mean(np.cast['float64'](raw_data) / 2, axis=1))
    True
    """
32

Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
33
34
35
36
37
    def __init__(self, processors, read_original_data=None, **kwargs):
        min_preprocessed_file_size = min(
            (p.min_preprocessed_file_size for p in processors))
        if read_original_data is None:
            read_original_data = processors[0].read_original_data
38
39
40
41
        SequentialProcessor.__init__(self, processors)
        Preprocessor.__init__(
            self, min_preprocessed_file_size=min_preprocessed_file_size,
            **kwargs)
42

Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
43
    def __call__(self, data, annotations=None):
44
45
46
47
48
49
50
        return super(SequentialPreprocessor, self).__call__(
            data, annotations=annotations)

    def read_data(self, data_file):
        return self.processors[-1].read_data(data_file)

    def write_data(self, data, data_file):
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
51
        self.processors[-1].write_data(data, data_file)
52
53
54


class ParallelPreprocessor(ParallelProcessor, Preprocessor):
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
    """A helper class which takes several preprocessors and applies them on
    each processor separately and yields their outputs one by one.

    Attributes
    ----------
    processors : list
        A list of preprocessors to apply.

    Examples
    --------
    You can use this class to apply several preprocessors on your data and get
    all the results back. For example:

    >>> import numpy as np
    >>> from functools import  partial
    >>> from bob.bio.base.preprocessor import ParallelPreprocessor, CallablePreprocessor
    >>> raw_data = np.array([[1, 2, 3], [1, 2, 3]])
    >>> parallel_preprocessor = ParallelPreprocessor(
    ...     [CallablePreprocessor(f, accepts_annotations=False) for f in
    ...      [np.cast['float64'], lambda x: x / 2.0]])
    >>> list(parallel_preprocessor(raw_data))
    [array([[ 1.,  2.,  3.],
           [ 1.,  2.,  3.]]),
     array([[ 0.5,  1. ,  1.5],
           [ 0.5,  1. ,  1.5]])]

    The data may be further processed using a :any:`SequentialProcessor`:

    >>> from bob.bio.base.preprocessor import SequentialPreprocessor
    >>> total_preprocessor = SequentialPreprocessor(
    ...     [parallel_preprocessor, CallablePreprocessor(list, False),
    ...      CallablePreprocessor(partial(np.concatenate, axis=1), False)])
    >>> total_preprocessor(raw_data)
    array([[ 1. ,  2. ,  3. ,  0.5,  1. ,  1.5],
           [ 1. ,  2. ,  3. ,  0.5,  1. ,  1.5]])
    """
91

92
93
94
    def __init__(self, processors, **kwargs):
        min_preprocessed_file_size = min(p.min_preprocessed_file_size for p in
                                         processors)
95

96
97
98
99
        ParallelProcessor.__init__(self, processors)
        Preprocessor.__init__(
            self, min_preprocessed_file_size=min_preprocessed_file_size,
            **kwargs)
100

Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
101
    def __call__(self, data, annotations=None):
102
103
        return super(ParallelPreprocessor, self).__call__(
            data, annotations=annotations)
104
105
106


class CallablePreprocessor(Preprocessor):
107
108
109
110
111
112
113
114
115
116
    """A simple preprocessor that takes a callable and applies that callable to
    the input.

    Attributes
    ----------
    accepts_annotations : bool
        If False, annotations are not passed to the callable.
    callable : object
        Anything that is callable. It will be used as a preprocessor in
        bob.bio.base.
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
117
118
119
120
121
122
123
124
    read_data : object
        A callable object with the signature of
        ``data = read_data(data_file)``. If not provided, the default
        implementation handles numpy arrays.
    write_data : object
        A callable object with the signature of
        ``write_data(data, data_file)``. If not provided, the default
        implementation handles numpy arrays.
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
125
126
127
128
129
130
131

    Examples
    --------
    You can take any function like ``numpy.cast['float32']`` to cast your data
    to float32 for example. This is useful when you want to stack several
    preprocessors using the :any:`SequentialPreprocessor` and
    :any:`ParallelPreprocessor` classes.
132
133
    """

Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
134
135
    def __init__(self, callable, accepts_annotations=True, write_data=None,
                 read_data=None, **kwargs):
136
137
138
139
140
        super(CallablePreprocessor, self).__init__(
            callable=callable, accepts_annotations=accepts_annotations,
            **kwargs)
        self.callable = callable
        self.accepts_annotations = accepts_annotations
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
141
142
143
144
        if write_data is not None:
            self.write_data = write_data
        if read_data is not None:
            self.read_data = read_data
145

Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
146
    def __call__(self, data, annotations=None):
147
148
149
150
        if self.accepts_annotations:
            return self.callable(data, annotations)
        else:
            return self.callable(data)