stats.py 10.1 KB
Newer Older
André Anjos's avatar
André Anjos committed
1
2
3
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :

Samuel GAIST's avatar
Samuel GAIST committed
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
###################################################################################
#                                                                                 #
# Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/               #
# Contact: beat.support@idiap.ch                                                  #
#                                                                                 #
# Redistribution and use in source and binary forms, with or without              #
# modification, are permitted provided that the following conditions are met:     #
#                                                                                 #
# 1. Redistributions of source code must retain the above copyright notice, this  #
# list of conditions and the following disclaimer.                                #
#                                                                                 #
# 2. Redistributions in binary form must reproduce the above copyright notice,    #
# this list of conditions and the following disclaimer in the documentation       #
# and/or other materials provided with the distribution.                          #
#                                                                                 #
# 3. Neither the name of the copyright holder nor the names of its contributors   #
# may be used to endorse or promote products derived from this software without   #
# specific prior written permission.                                              #
#                                                                                 #
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND #
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED   #
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE          #
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE    #
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL      #
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR      #
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER      #
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,   #
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE   #
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.            #
#                                                                                 #
###################################################################################

André Anjos's avatar
André Anjos committed
36

37
38
39
40
41
42
"""
=====
stats
=====

A class that can read, validate and update statistical information
43
44
45
46

Forward impored from :py:mod:`beat.backend.python.stats`:
:py:func:`beat.backend.python.stats.io_statistics`
:py:func:`beat.backend.python.stats.update`
47
"""
André Anjos's avatar
André Anjos committed
48
49
50
51
52


import os
import copy

53
import simplejson as json
André Anjos's avatar
André Anjos committed
54
55
56
57

from . import schema
from . import prototypes

58
59
from beat.backend.python.stats import io_statistics  # noqa
from beat.backend.python.stats import update  # noqa
60
61


André Anjos's avatar
André Anjos committed
62
class Statistics(object):
Philip ABBET's avatar
Philip ABBET committed
63
    """Statistics define resource usage for algorithmic code runs
André Anjos's avatar
André Anjos committed
64
65


Philip ABBET's avatar
Philip ABBET committed
66
    Parameters:
André Anjos's avatar
André Anjos committed
67

André Anjos's avatar
André Anjos committed
68
      data (:py:class:`object`, Optional): The piece of data representing the
Philip ABBET's avatar
Philip ABBET committed
69
70
71
        statistics the be read, it must validate against our pre-defined
        execution schema. If the input is ``None`` or empty, then start a new
        statistics from scratch.
André Anjos's avatar
André Anjos committed
72
73


Philip ABBET's avatar
Philip ABBET committed
74
    Attributes:
André Anjos's avatar
André Anjos committed
75

76
      errors (list): A list strings containing errors found while loading this
Philip ABBET's avatar
Philip ABBET committed
77
        statistics information.
André Anjos's avatar
André Anjos committed
78

Philip ABBET's avatar
Philip ABBET committed
79
      data (dict): The original data for these statistics
André Anjos's avatar
André Anjos committed
80

Philip ABBET's avatar
Philip ABBET committed
81
    """
André Anjos's avatar
André Anjos committed
82

Philip ABBET's avatar
Philip ABBET committed
83
    def __init__(self, data=None):
André Anjos's avatar
André Anjos committed
84

Philip ABBET's avatar
Philip ABBET committed
85
        self.errors = []
André Anjos's avatar
André Anjos committed
86

Philip ABBET's avatar
Philip ABBET committed
87
        if data:
88
            self._load(data)  # also runs validation
Philip ABBET's avatar
Philip ABBET committed
89
        else:
Samuel GAIST's avatar
Samuel GAIST committed
90
            self._data, self.errors = prototypes.load("statistics")  # also validates
André Anjos's avatar
André Anjos committed
91

Philip ABBET's avatar
Philip ABBET committed
92
93
    def _load(self, data):
        """Loads the statistics
André Anjos's avatar
André Anjos committed
94

Philip ABBET's avatar
Philip ABBET committed
95
        Parameters:
André Anjos's avatar
André Anjos committed
96

97
98
99
          data (object, str, file): The piece of data to load. The input can be
            a valid python object that represents a JSON structure, a file,
            from which the JSON contents will be read out or a string. See
Philip ABBET's avatar
Philip ABBET committed
100
101
            :py:func:`schema.validate` for more details.
        """
André Anjos's avatar
André Anjos committed
102

Philip ABBET's avatar
Philip ABBET committed
103
104
105
        # reset
        self._data = None
        self.errors = []
André Anjos's avatar
André Anjos committed
106

107
        if not isinstance(data, dict):  # user has passed a file pointer
Philip ABBET's avatar
Philip ABBET committed
108
            if not os.path.exists(data):
Samuel GAIST's avatar
Samuel GAIST committed
109
                self.errors.append("File not found: %s" % data)
Philip ABBET's avatar
Philip ABBET committed
110
                return
André Anjos's avatar
André Anjos committed
111

Philip ABBET's avatar
Philip ABBET committed
112
        # this runs basic validation, including JSON loading if required
Samuel GAIST's avatar
Samuel GAIST committed
113
114
115
        self._data, self.errors = schema.validate("statistics", data)
        if self.errors:
            return  # don't proceed with the rest of validation
André Anjos's avatar
André Anjos committed
116

Philip ABBET's avatar
Philip ABBET committed
117
118
119
    @property
    def schema_version(self):
        """Returns the schema version"""
120

Samuel GAIST's avatar
Samuel GAIST committed
121
        return self.data.get("schema_version", 1)
André Anjos's avatar
André Anjos committed
122

Philip ABBET's avatar
Philip ABBET committed
123
124
125
    @property
    def cpu(self):
        """Returns only CPU information"""
126

Samuel GAIST's avatar
Samuel GAIST committed
127
        return self._data["cpu"]
André Anjos's avatar
André Anjos committed
128

Philip ABBET's avatar
Philip ABBET committed
129
130
131
    @cpu.setter
    def cpu(self, data):
        """Sets the CPU information"""
André Anjos's avatar
André Anjos committed
132

Samuel GAIST's avatar
Samuel GAIST committed
133
134
        for key in ("user", "system", "total"):
            self._data["cpu"][key] = data[key]
André Anjos's avatar
André Anjos committed
135

Samuel GAIST's avatar
Samuel GAIST committed
136
137
        for key in ("voluntary", "involuntary"):
            self._data["cpu"]["context_switches"][key] = data["context_switches"][key]
André Anjos's avatar
André Anjos committed
138

Philip ABBET's avatar
Philip ABBET committed
139
140
141
    @property
    def memory(self):
        """Returns only memory information"""
142

Samuel GAIST's avatar
Samuel GAIST committed
143
        return self._data["memory"]
André Anjos's avatar
André Anjos committed
144

Philip ABBET's avatar
Philip ABBET committed
145
146
147
    @memory.setter
    def memory(self, data):
        """Sets only the memory information"""
André Anjos's avatar
André Anjos committed
148

Samuel GAIST's avatar
Samuel GAIST committed
149
150
        for key in ("rss",):
            self._data["memory"][key] = data[key]
André Anjos's avatar
André Anjos committed
151

Philip ABBET's avatar
Philip ABBET committed
152
153
154
    @property
    def data(self):
        """Returns only I/O information"""
155

Samuel GAIST's avatar
Samuel GAIST committed
156
        return self._data["data"]
André Anjos's avatar
André Anjos committed
157

Philip ABBET's avatar
Philip ABBET committed
158
159
160
    @data.setter
    def data(self, data):
        """Sets only the I/O information"""
André Anjos's avatar
André Anjos committed
161

Samuel GAIST's avatar
Samuel GAIST committed
162
163
164
        for key in ("volume", "blocks", "time"):
            self._data["data"][key]["read"] = data[key]["read"]
            self._data["data"][key]["write"] = data[key]["write"]
André Anjos's avatar
André Anjos committed
165

Samuel GAIST's avatar
Samuel GAIST committed
166
167
        self._data["data"]["files"] = list(data["files"])
        self._data["network"] = data["network"]
André Anjos's avatar
André Anjos committed
168

Philip ABBET's avatar
Philip ABBET committed
169
170
171
    @property
    def valid(self):
        """A boolean that indicates if this executor is valid or not"""
André Anjos's avatar
André Anjos committed
172

Philip ABBET's avatar
Philip ABBET committed
173
        return not bool(self.errors)
André Anjos's avatar
André Anjos committed
174

Philip ABBET's avatar
Philip ABBET committed
175
176
    def __add__(self, other):
        """Adds two statistics data blocks"""
André Anjos's avatar
André Anjos committed
177

Philip ABBET's avatar
Philip ABBET committed
178
179
180
        retval = Statistics(copy.deepcopy(self._data))
        retval += other
        return retval
André Anjos's avatar
André Anjos committed
181

Philip ABBET's avatar
Philip ABBET committed
182
183
    def __iadd__(self, other):
        """Self-add statistics from another block"""
André Anjos's avatar
André Anjos committed
184

Samuel GAIST's avatar
Samuel GAIST committed
185
186
        if not isinstance(other, Statistics):
            return NotImplemented
André Anjos's avatar
André Anjos committed
187

Samuel GAIST's avatar
Samuel GAIST committed
188
189
        for key in ("user", "system", "total"):
            self._data["cpu"][key] += other._data["cpu"][key]
André Anjos's avatar
André Anjos committed
190

Samuel GAIST's avatar
Samuel GAIST committed
191
192
193
194
        for key in ("voluntary", "involuntary"):
            self._data["cpu"]["context_switches"][key] += other._data["cpu"][
                "context_switches"
            ][key]
André Anjos's avatar
André Anjos committed
195

Samuel GAIST's avatar
Samuel GAIST committed
196
197
198
199
        for key in ("rss",):  # gets the maximum between the two
            self._data["memory"][key] = max(
                other._data["memory"][key], self._data["memory"][key]
            )
André Anjos's avatar
André Anjos committed
200

Samuel GAIST's avatar
Samuel GAIST committed
201
202
203
        for key in ("volume", "blocks", "time"):
            self._data["data"][key]["read"] += other._data["data"][key]["read"]
            self._data["data"][key]["write"] += other._data["data"][key]["write"]
André Anjos's avatar
André Anjos committed
204

Samuel GAIST's avatar
Samuel GAIST committed
205
        self._data["data"]["files"] += other._data["data"]["files"]
André Anjos's avatar
André Anjos committed
206

Samuel GAIST's avatar
Samuel GAIST committed
207
208
209
        self._data["data"]["network"]["wait_time"] += other._data["data"]["network"][
            "wait_time"
        ]
André Anjos's avatar
André Anjos committed
210

Philip ABBET's avatar
Philip ABBET committed
211
        return self
André Anjos's avatar
André Anjos committed
212

Philip ABBET's avatar
Philip ABBET committed
213
    def __str__(self):
André Anjos's avatar
André Anjos committed
214

Philip ABBET's avatar
Philip ABBET committed
215
        return self.as_json(2)
André Anjos's avatar
André Anjos committed
216

Philip ABBET's avatar
Philip ABBET committed
217
    def as_json(self, indent=None):
218
219
220
221
222
223
224
225
        """Returns self as as JSON

        Parameters:
            :param indent int: Indentation to use for the JSON generation

        Returns:
            dict: JSON representation
        """
André Anjos's avatar
André Anjos committed
226

227
        return json.dumps(self._data, indent=indent)
André Anjos's avatar
André Anjos committed
228

Philip ABBET's avatar
Philip ABBET committed
229
    def as_dict(self):
230
        """Returns self as a dictionary"""
André Anjos's avatar
André Anjos committed
231

Philip ABBET's avatar
Philip ABBET committed
232
        return self._data
André Anjos's avatar
André Anjos committed
233

Philip ABBET's avatar
Philip ABBET committed
234
235
    def write(self, f):
        """Writes contents to a file-like object"""
André Anjos's avatar
André Anjos committed
236

Samuel GAIST's avatar
Samuel GAIST committed
237
238
        if hasattr(f, "write"):
            f.write(str(self))
Philip ABBET's avatar
Philip ABBET committed
239
        else:
Samuel GAIST's avatar
Samuel GAIST committed
240
241
            with open(f, "wt") as fobj:
                fobj.write(str(self))
André Anjos's avatar
André Anjos committed
242
243


244
# ----------------------------------------------------------
André Anjos's avatar
André Anjos committed
245
246


247
def cpu_statistics(start, end):
Philip ABBET's avatar
Philip ABBET committed
248
    """Summarizes current CPU usage
André Anjos's avatar
André Anjos committed
249

Philip ABBET's avatar
Philip ABBET committed
250
251
252
253
    This method should be used when the currently set algorithm is the only one
    executed through the whole process. It is done for collecting resource
    statistics on separate processing environments. It follows the recipe in:
    http://stackoverflow.com/questions/30271942/get-docker-container-cpu-usage-as-percentage
André Anjos's avatar
André Anjos committed
254

Philip ABBET's avatar
Philip ABBET committed
255
    Returns:
André Anjos's avatar
André Anjos committed
256

Philip ABBET's avatar
Philip ABBET committed
257
      dict: A dictionary summarizing current CPU usage
André Anjos's avatar
André Anjos committed
258

Philip ABBET's avatar
Philip ABBET committed
259
    """
André Anjos's avatar
André Anjos committed
260

Samuel GAIST's avatar
Samuel GAIST committed
261
    if "system_cpu_usage" not in end:
Philip ABBET's avatar
Philip ABBET committed
262
        return {
Samuel GAIST's avatar
Samuel GAIST committed
263
264
265
266
267
268
            "user": 0.0,
            "system": 0.0,
            "total": 0.0,
            "percent": 0.0,
            "processors": 1,
        }
Philip ABBET's avatar
Philip ABBET committed
269

Philip ABBET's avatar
Philip ABBET committed
270
    if start is not None:
Samuel GAIST's avatar
Samuel GAIST committed
271
272
        user_cpu = end["cpu_usage"]["total_usage"] - start["cpu_usage"]["total_usage"]
        total_cpu = end["system_cpu_usage"] - start["system_cpu_usage"]
273

Philip ABBET's avatar
Philip ABBET committed
274
    else:
Samuel GAIST's avatar
Samuel GAIST committed
275
276
        user_cpu = end["cpu_usage"]["total_usage"]
        total_cpu = end["system_cpu_usage"]
277

Samuel GAIST's avatar
Samuel GAIST committed
278
279
280
281
282
283
284
    user_cpu /= 1000000000.0  # in seconds
    total_cpu /= 1000000000.0  # in seconds
    processors = (
        len(end["cpu_usage"]["percpu_usage"])
        if end["cpu_usage"]["percpu_usage"] is not None
        else 1
    )
285

Philip ABBET's avatar
Philip ABBET committed
286
    return {
Samuel GAIST's avatar
Samuel GAIST committed
287
288
289
290
291
292
        "user": user_cpu,
        "system": 0.0,
        "total": total_cpu,
        "percent": 100.0 * processors * user_cpu / total_cpu if total_cpu else 0.0,
        "processors": processors,
    }
André Anjos's avatar
André Anjos committed
293
294


295
# ----------------------------------------------------------
296
297


298
def memory_statistics(data):
Philip ABBET's avatar
Philip ABBET committed
299
    """Summarizes current memory usage
André Anjos's avatar
André Anjos committed
300

Philip ABBET's avatar
Philip ABBET committed
301
302
303
    This method should be used when the currently set algorithm is the only one
    executed through the whole process. It is done for collecting resource
    statistics on separate processing environments.
André Anjos's avatar
André Anjos committed
304

Philip ABBET's avatar
Philip ABBET committed
305
    Returns:
André Anjos's avatar
André Anjos committed
306

Philip ABBET's avatar
Philip ABBET committed
307
      dict: A dictionary summarizing current memory usage
André Anjos's avatar
André Anjos committed
308

Philip ABBET's avatar
Philip ABBET committed
309
    """
André Anjos's avatar
André Anjos committed
310

Samuel GAIST's avatar
Samuel GAIST committed
311
312
    limit = float(data["limit"])
    memory = float(data["max_usage"])
André Anjos's avatar
André Anjos committed
313

Philip ABBET's avatar
Philip ABBET committed
314
    return {
Samuel GAIST's avatar
Samuel GAIST committed
315
316
317
318
        "rss": memory,
        "limit": limit,
        "percent": 100.0 * memory / limit if limit else 0.0,
    }