stats.py 10 KB
Newer Older
André Anjos's avatar
André Anjos committed
1
2
3
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :

Samuel GAIST's avatar
Samuel GAIST committed
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
###################################################################################
#                                                                                 #
# Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/               #
# Contact: beat.support@idiap.ch                                                  #
#                                                                                 #
# Redistribution and use in source and binary forms, with or without              #
# modification, are permitted provided that the following conditions are met:     #
#                                                                                 #
# 1. Redistributions of source code must retain the above copyright notice, this  #
# list of conditions and the following disclaimer.                                #
#                                                                                 #
# 2. Redistributions in binary form must reproduce the above copyright notice,    #
# this list of conditions and the following disclaimer in the documentation       #
# and/or other materials provided with the distribution.                          #
#                                                                                 #
# 3. Neither the name of the copyright holder nor the names of its contributors   #
# may be used to endorse or promote products derived from this software without   #
# specific prior written permission.                                              #
#                                                                                 #
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND #
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED   #
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE          #
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE    #
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL      #
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR      #
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER      #
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,   #
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE   #
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.            #
#                                                                                 #
###################################################################################

André Anjos's avatar
André Anjos committed
36

37
38
39
40
41
42
"""
=====
stats
=====

A class that can read, validate and update statistical information
43
44
45
46

Forward impored from :py:mod:`beat.backend.python.stats`:
:py:func:`beat.backend.python.stats.io_statistics`
:py:func:`beat.backend.python.stats.update`
47
"""
André Anjos's avatar
André Anjos committed
48
import copy
Samuel GAIST's avatar
Samuel GAIST committed
49
import os
André Anjos's avatar
André Anjos committed
50

51
import simplejson as json
André Anjos's avatar
André Anjos committed
52

53
54
from beat.backend.python.stats import io_statistics  # noqa
from beat.backend.python.stats import update  # noqa
55

Samuel GAIST's avatar
Samuel GAIST committed
56
57
58
from . import prototypes
from . import schema

59

André Anjos's avatar
André Anjos committed
60
class Statistics(object):
Philip ABBET's avatar
Philip ABBET committed
61
    """Statistics define resource usage for algorithmic code runs
André Anjos's avatar
André Anjos committed
62
63


Philip ABBET's avatar
Philip ABBET committed
64
    Parameters:
André Anjos's avatar
André Anjos committed
65

André Anjos's avatar
André Anjos committed
66
      data (:py:class:`object`, Optional): The piece of data representing the
Philip ABBET's avatar
Philip ABBET committed
67
68
69
        statistics the be read, it must validate against our pre-defined
        execution schema. If the input is ``None`` or empty, then start a new
        statistics from scratch.
André Anjos's avatar
André Anjos committed
70
71


Philip ABBET's avatar
Philip ABBET committed
72
    Attributes:
André Anjos's avatar
André Anjos committed
73

74
      errors (list): A list strings containing errors found while loading this
Philip ABBET's avatar
Philip ABBET committed
75
        statistics information.
André Anjos's avatar
André Anjos committed
76

Philip ABBET's avatar
Philip ABBET committed
77
    """
André Anjos's avatar
André Anjos committed
78

Philip ABBET's avatar
Philip ABBET committed
79
    def __init__(self, data=None):
André Anjos's avatar
André Anjos committed
80

Philip ABBET's avatar
Philip ABBET committed
81
        self.errors = []
André Anjos's avatar
André Anjos committed
82

Philip ABBET's avatar
Philip ABBET committed
83
        if data:
84
            self._load(data)  # also runs validation
Philip ABBET's avatar
Philip ABBET committed
85
        else:
Samuel GAIST's avatar
Samuel GAIST committed
86
            self._data, self.errors = prototypes.load("statistics")  # also validates
André Anjos's avatar
André Anjos committed
87

Philip ABBET's avatar
Philip ABBET committed
88
89
    def _load(self, data):
        """Loads the statistics
André Anjos's avatar
André Anjos committed
90

Philip ABBET's avatar
Philip ABBET committed
91
        Parameters:
André Anjos's avatar
André Anjos committed
92

93
94
95
          data (object, str, file): The piece of data to load. The input can be
            a valid python object that represents a JSON structure, a file,
            from which the JSON contents will be read out or a string. See
Philip ABBET's avatar
Philip ABBET committed
96
97
            :py:func:`schema.validate` for more details.
        """
André Anjos's avatar
André Anjos committed
98

Philip ABBET's avatar
Philip ABBET committed
99
100
101
        # reset
        self._data = None
        self.errors = []
André Anjos's avatar
André Anjos committed
102

103
        if not isinstance(data, dict):  # user has passed a file pointer
Philip ABBET's avatar
Philip ABBET committed
104
            if not os.path.exists(data):
Samuel GAIST's avatar
Samuel GAIST committed
105
                self.errors.append("File not found: %s" % data)
Philip ABBET's avatar
Philip ABBET committed
106
                return
André Anjos's avatar
André Anjos committed
107

Philip ABBET's avatar
Philip ABBET committed
108
        # this runs basic validation, including JSON loading if required
Samuel GAIST's avatar
Samuel GAIST committed
109
110
111
        self._data, self.errors = schema.validate("statistics", data)
        if self.errors:
            return  # don't proceed with the rest of validation
André Anjos's avatar
André Anjos committed
112

Philip ABBET's avatar
Philip ABBET committed
113
114
115
    @property
    def schema_version(self):
        """Returns the schema version"""
116

Samuel GAIST's avatar
Samuel GAIST committed
117
        return self.data.get("schema_version", 1)
André Anjos's avatar
André Anjos committed
118

Philip ABBET's avatar
Philip ABBET committed
119
120
121
    @property
    def cpu(self):
        """Returns only CPU information"""
122

Samuel GAIST's avatar
Samuel GAIST committed
123
        return self._data["cpu"]
André Anjos's avatar
André Anjos committed
124

Philip ABBET's avatar
Philip ABBET committed
125
126
127
    @cpu.setter
    def cpu(self, data):
        """Sets the CPU information"""
André Anjos's avatar
André Anjos committed
128

Samuel GAIST's avatar
Samuel GAIST committed
129
130
        for key in ("user", "system", "total"):
            self._data["cpu"][key] = data[key]
André Anjos's avatar
André Anjos committed
131

Samuel GAIST's avatar
Samuel GAIST committed
132
133
        for key in ("voluntary", "involuntary"):
            self._data["cpu"]["context_switches"][key] = data["context_switches"][key]
André Anjos's avatar
André Anjos committed
134

Philip ABBET's avatar
Philip ABBET committed
135
136
137
    @property
    def memory(self):
        """Returns only memory information"""
138

Samuel GAIST's avatar
Samuel GAIST committed
139
        return self._data["memory"]
André Anjos's avatar
André Anjos committed
140

Philip ABBET's avatar
Philip ABBET committed
141
142
143
    @memory.setter
    def memory(self, data):
        """Sets only the memory information"""
André Anjos's avatar
André Anjos committed
144

Samuel GAIST's avatar
Samuel GAIST committed
145
146
        for key in ("rss",):
            self._data["memory"][key] = data[key]
André Anjos's avatar
André Anjos committed
147

Philip ABBET's avatar
Philip ABBET committed
148
149
150
    @property
    def data(self):
        """Returns only I/O information"""
151

Samuel GAIST's avatar
Samuel GAIST committed
152
        return self._data["data"]
André Anjos's avatar
André Anjos committed
153

Philip ABBET's avatar
Philip ABBET committed
154
155
156
    @data.setter
    def data(self, data):
        """Sets only the I/O information"""
André Anjos's avatar
André Anjos committed
157

Samuel GAIST's avatar
Samuel GAIST committed
158
159
160
        for key in ("volume", "blocks", "time"):
            self._data["data"][key]["read"] = data[key]["read"]
            self._data["data"][key]["write"] = data[key]["write"]
André Anjos's avatar
André Anjos committed
161

Samuel GAIST's avatar
Samuel GAIST committed
162
163
        self._data["data"]["files"] = list(data["files"])
        self._data["network"] = data["network"]
André Anjos's avatar
André Anjos committed
164

Philip ABBET's avatar
Philip ABBET committed
165
166
167
    @property
    def valid(self):
        """A boolean that indicates if this executor is valid or not"""
André Anjos's avatar
André Anjos committed
168

Philip ABBET's avatar
Philip ABBET committed
169
        return not bool(self.errors)
André Anjos's avatar
André Anjos committed
170

Philip ABBET's avatar
Philip ABBET committed
171
172
    def __add__(self, other):
        """Adds two statistics data blocks"""
André Anjos's avatar
André Anjos committed
173

Philip ABBET's avatar
Philip ABBET committed
174
175
176
        retval = Statistics(copy.deepcopy(self._data))
        retval += other
        return retval
André Anjos's avatar
André Anjos committed
177

Philip ABBET's avatar
Philip ABBET committed
178
179
    def __iadd__(self, other):
        """Self-add statistics from another block"""
André Anjos's avatar
André Anjos committed
180

Samuel GAIST's avatar
Samuel GAIST committed
181
182
        if not isinstance(other, Statistics):
            return NotImplemented
André Anjos's avatar
André Anjos committed
183

Samuel GAIST's avatar
Samuel GAIST committed
184
185
        for key in ("user", "system", "total"):
            self._data["cpu"][key] += other._data["cpu"][key]
André Anjos's avatar
André Anjos committed
186

Samuel GAIST's avatar
Samuel GAIST committed
187
188
189
190
        for key in ("voluntary", "involuntary"):
            self._data["cpu"]["context_switches"][key] += other._data["cpu"][
                "context_switches"
            ][key]
André Anjos's avatar
André Anjos committed
191

Samuel GAIST's avatar
Samuel GAIST committed
192
193
194
195
        for key in ("rss",):  # gets the maximum between the two
            self._data["memory"][key] = max(
                other._data["memory"][key], self._data["memory"][key]
            )
André Anjos's avatar
André Anjos committed
196

Samuel GAIST's avatar
Samuel GAIST committed
197
198
199
        for key in ("volume", "blocks", "time"):
            self._data["data"][key]["read"] += other._data["data"][key]["read"]
            self._data["data"][key]["write"] += other._data["data"][key]["write"]
André Anjos's avatar
André Anjos committed
200

Samuel GAIST's avatar
Samuel GAIST committed
201
        self._data["data"]["files"] += other._data["data"]["files"]
André Anjos's avatar
André Anjos committed
202

Samuel GAIST's avatar
Samuel GAIST committed
203
204
205
        self._data["data"]["network"]["wait_time"] += other._data["data"]["network"][
            "wait_time"
        ]
André Anjos's avatar
André Anjos committed
206

Philip ABBET's avatar
Philip ABBET committed
207
        return self
André Anjos's avatar
André Anjos committed
208

Philip ABBET's avatar
Philip ABBET committed
209
    def __str__(self):
André Anjos's avatar
André Anjos committed
210

Philip ABBET's avatar
Philip ABBET committed
211
        return self.as_json(2)
André Anjos's avatar
André Anjos committed
212

Philip ABBET's avatar
Philip ABBET committed
213
    def as_json(self, indent=None):
214
215
216
217
218
219
220
221
        """Returns self as as JSON

        Parameters:
            :param indent int: Indentation to use for the JSON generation

        Returns:
            dict: JSON representation
        """
André Anjos's avatar
André Anjos committed
222

223
        return json.dumps(self._data, indent=indent)
André Anjos's avatar
André Anjos committed
224

Philip ABBET's avatar
Philip ABBET committed
225
    def as_dict(self):
226
        """Returns self as a dictionary"""
André Anjos's avatar
André Anjos committed
227

Philip ABBET's avatar
Philip ABBET committed
228
        return self._data
André Anjos's avatar
André Anjos committed
229

Philip ABBET's avatar
Philip ABBET committed
230
231
    def write(self, f):
        """Writes contents to a file-like object"""
André Anjos's avatar
André Anjos committed
232

Samuel GAIST's avatar
Samuel GAIST committed
233
234
        if hasattr(f, "write"):
            f.write(str(self))
Philip ABBET's avatar
Philip ABBET committed
235
        else:
Samuel GAIST's avatar
Samuel GAIST committed
236
237
            with open(f, "wt") as fobj:
                fobj.write(str(self))
André Anjos's avatar
André Anjos committed
238
239


240
# ----------------------------------------------------------
André Anjos's avatar
André Anjos committed
241
242


243
def cpu_statistics(start, end):
Philip ABBET's avatar
Philip ABBET committed
244
    """Summarizes current CPU usage
André Anjos's avatar
André Anjos committed
245

Philip ABBET's avatar
Philip ABBET committed
246
247
248
249
    This method should be used when the currently set algorithm is the only one
    executed through the whole process. It is done for collecting resource
    statistics on separate processing environments. It follows the recipe in:
    http://stackoverflow.com/questions/30271942/get-docker-container-cpu-usage-as-percentage
André Anjos's avatar
André Anjos committed
250

Philip ABBET's avatar
Philip ABBET committed
251
    Returns:
André Anjos's avatar
André Anjos committed
252

Philip ABBET's avatar
Philip ABBET committed
253
      dict: A dictionary summarizing current CPU usage
André Anjos's avatar
André Anjos committed
254

Philip ABBET's avatar
Philip ABBET committed
255
    """
André Anjos's avatar
André Anjos committed
256

Samuel GAIST's avatar
Samuel GAIST committed
257
    if "system_cpu_usage" not in end:
Philip ABBET's avatar
Philip ABBET committed
258
        return {
Samuel GAIST's avatar
Samuel GAIST committed
259
260
261
262
263
264
            "user": 0.0,
            "system": 0.0,
            "total": 0.0,
            "percent": 0.0,
            "processors": 1,
        }
Philip ABBET's avatar
Philip ABBET committed
265

Philip ABBET's avatar
Philip ABBET committed
266
    if start is not None:
Samuel GAIST's avatar
Samuel GAIST committed
267
268
        user_cpu = end["cpu_usage"]["total_usage"] - start["cpu_usage"]["total_usage"]
        total_cpu = end["system_cpu_usage"] - start["system_cpu_usage"]
269

Philip ABBET's avatar
Philip ABBET committed
270
    else:
Samuel GAIST's avatar
Samuel GAIST committed
271
272
        user_cpu = end["cpu_usage"]["total_usage"]
        total_cpu = end["system_cpu_usage"]
273

Samuel GAIST's avatar
Samuel GAIST committed
274
275
276
277
278
279
280
    user_cpu /= 1000000000.0  # in seconds
    total_cpu /= 1000000000.0  # in seconds
    processors = (
        len(end["cpu_usage"]["percpu_usage"])
        if end["cpu_usage"]["percpu_usage"] is not None
        else 1
    )
281

Philip ABBET's avatar
Philip ABBET committed
282
    return {
Samuel GAIST's avatar
Samuel GAIST committed
283
284
285
286
287
288
        "user": user_cpu,
        "system": 0.0,
        "total": total_cpu,
        "percent": 100.0 * processors * user_cpu / total_cpu if total_cpu else 0.0,
        "processors": processors,
    }
André Anjos's avatar
André Anjos committed
289
290


291
# ----------------------------------------------------------
292
293


294
def memory_statistics(data):
Philip ABBET's avatar
Philip ABBET committed
295
    """Summarizes current memory usage
André Anjos's avatar
André Anjos committed
296

Philip ABBET's avatar
Philip ABBET committed
297
298
299
    This method should be used when the currently set algorithm is the only one
    executed through the whole process. It is done for collecting resource
    statistics on separate processing environments.
André Anjos's avatar
André Anjos committed
300

Philip ABBET's avatar
Philip ABBET committed
301
    Returns:
André Anjos's avatar
André Anjos committed
302

Philip ABBET's avatar
Philip ABBET committed
303
      dict: A dictionary summarizing current memory usage
André Anjos's avatar
André Anjos committed
304

Philip ABBET's avatar
Philip ABBET committed
305
    """
André Anjos's avatar
André Anjos committed
306

Samuel GAIST's avatar
Samuel GAIST committed
307
308
    limit = float(data["limit"])
    memory = float(data["max_usage"])
André Anjos's avatar
André Anjos committed
309

Philip ABBET's avatar
Philip ABBET committed
310
    return {
Samuel GAIST's avatar
Samuel GAIST committed
311
312
313
314
        "rss": memory,
        "limit": limit,
        "percent": 100.0 * memory / limit if limit else 0.0,
    }