Commit b8fd9960 authored by Samuel GAIST's avatar Samuel GAIST

[backend] Pre-commit cleanup

parent d1be9948
Pipeline #40902 passed with stage
in 5 minutes and 2 seconds
......@@ -42,20 +42,19 @@ algorithm
Validation for algorithms
"""
import logging
import os
import sys
import logging
import six
import numpy
import simplejson as json
import six
from . import dataformat
from . import library
from . import loader
from . import utils
logger = logging.getLogger(__name__)
......
......@@ -43,29 +43,31 @@ Base type for all data formats
"""
import struct
import numpy
import six
# This table defines what is the binary format for each supported basic type
BINCODE = {
numpy.dtype('int8'): 'b', #signed char
numpy.dtype('int16'): 'h', #signed short
numpy.dtype('int32'): 'i', #signed int
numpy.dtype('int64'): 'q', #signed long long
numpy.dtype('uint8'): 'B', #unsigned char
numpy.dtype('uint16'): 'H', #unsigned short
numpy.dtype('uint32'): 'I', #unsigned int
numpy.dtype('uint64'): 'Q', #unsigned long long
numpy.dtype('float32'): 'f', #a single float
numpy.dtype('float64'): 'd', #a single double
numpy.dtype('complex64'): 'f', #two floats (real, imag)
numpy.dtype('complex128'): 'd', #two doubles (real, imag)
numpy.dtype('bool'): '?', #C99 Bool_
}
ENDIANNESS = '<' #little-endian
SIZE = 'Q' #64-bit unsigned
STRING = ENDIANNESS + SIZE + '%ds'
numpy.dtype("int8"): "b", # signed char
numpy.dtype("int16"): "h", # signed short
numpy.dtype("int32"): "i", # signed int
numpy.dtype("int64"): "q", # signed long long
numpy.dtype("uint8"): "B", # unsigned char
numpy.dtype("uint16"): "H", # unsigned short
numpy.dtype("uint32"): "I", # unsigned int
numpy.dtype("uint64"): "Q", # unsigned long long
numpy.dtype("float32"): "f", # a single float
numpy.dtype("float64"): "d", # a single double
numpy.dtype("complex64"): "f", # two floats (real, imag)
numpy.dtype("complex128"): "d", # two doubles (real, imag)
numpy.dtype("bool"): "?", # C99 Bool_
}
ENDIANNESS = "<" # little-endian
SIZE = "Q" # 64-bit unsigned
STRING = ENDIANNESS + SIZE + "%ds"
def setup_scalar(formatname, attrname, dtype, value, casting, add_defaults):
"""Casts the value to the the scalar type defined by dtype
......@@ -98,16 +100,20 @@ def setup_scalar(formatname, attrname, dtype, value, casting, add_defaults):
"""
if hasattr(dtype, 'type') and issubclass(dtype.type, numpy.generic):
if hasattr(dtype, "type") and issubclass(dtype.type, numpy.generic):
if value is None: # use the default for the type
return dtype.type()
else:
if value: # zero is classified as int64 which can't be safely casted to uint64
if (
value
): # zero is classified as int64 which can't be safely casted to uint64
if not numpy.can_cast(numpy.array(value).dtype, dtype, casting=casting):
raise TypeError("cannot safely cast attribute `%s' on dataformat " \
"`%s' with type `%s' to `%s' without precision loss" % \
(attrname, formatname, numpy.array(value).dtype, dtype))
raise TypeError(
"cannot safely cast attribute `%s' on dataformat "
"`%s' with type `%s' to `%s' without precision loss"
% (attrname, formatname, numpy.array(value).dtype, dtype)
)
return dtype.type(value)
elif issubclass(dtype, str): # it is a string
......@@ -119,6 +125,7 @@ def setup_scalar(formatname, attrname, dtype, value, casting, add_defaults):
else: # it is a dataformat
return dtype().from_dict(value, casting=casting, add_defaults=add_defaults)
class _protected_str_ndarray(numpy.ndarray):
"""Increments :py:class:`numpy.ndarray` so that item assignment is checked
"""
......@@ -126,9 +133,11 @@ class _protected_str_ndarray(numpy.ndarray):
def __setitem__(self, key, value):
"""First checks for conformance and then assigns"""
if not isinstance(value, six.string_types):
raise TypeError("string array requires string objects for " \
"items but you passed `%s' (%s) while setting element " \
"%s" % (value, type(value), key))
raise TypeError(
"string array requires string objects for "
"items but you passed `%s' (%s) while setting element "
"%s" % (value, type(value), key)
)
return numpy.ndarray.__setitem__(self, key, value)
......@@ -140,14 +149,12 @@ class _protected_ndarray(numpy.ndarray):
"""First checks for conformance and then assigns"""
value_ = self._format_dtype()
value_.from_dict(
value,
casting=self._format_casting,
add_defaults=self._format_add_defaults,
)
value, casting=self._format_casting, add_defaults=self._format_add_defaults,
)
return numpy.ndarray.__setitem__(self, key, value_)
def setup_array(formatname, attrname, shape, dtype, value, casting,
add_defaults):
def setup_array(formatname, attrname, shape, dtype, value, casting, add_defaults):
"""Casts the value to the the array type defined by (shape, dtype)
......@@ -185,53 +192,57 @@ def setup_array(formatname, attrname, shape, dtype, value, casting,
"""
def is_empty(x):
if isinstance(x, (numpy.ndarray,)): return not x.size
if isinstance(x, (numpy.ndarray,)):
return not x.size
return not x
if is_empty(value):
# creates an empty array that remains unchecked
if hasattr(dtype, 'type') and issubclass(dtype.type, numpy.generic):
if hasattr(dtype, "type") and issubclass(dtype.type, numpy.generic):
retval = numpy.ndarray(shape, dtype=dtype)
elif issubclass(dtype, str): #it is a string
elif issubclass(dtype, str): # it is a string
retval = numpy.ndarray(shape, dtype=object).view(_protected_str_ndarray)
retval[~retval.astype(bool)] = ''
else: #it is a dataformat
retval[~retval.astype(bool)] = ""
else: # it is a dataformat
retval = numpy.ndarray(shape, dtype=object).view(_protected_ndarray)
retval._format_dtype = dtype
retval._format_casting = 'safe'
retval._format_casting = "safe"
retval._format_add_defaults = True
else:
if hasattr(dtype, 'type'):
if hasattr(dtype, "type"):
retval = numpy.array(value, dtype=dtype)
else:
retval = numpy.array(value) #blindly converts data
retval = numpy.array(value) # blindly converts data
if retval.ndim != len(shape):
raise TypeError("input argument for array attribute `%s' on " \
"dataformat `%s' has %d dimensions and does not respect " \
"what is requested in the data format (%d dimension(s))" % (
attrname, formatname, retval.ndim, len(shape),
)
)
raise TypeError(
"input argument for array attribute `%s' on "
"dataformat `%s' has %d dimensions and does not respect "
"what is requested in the data format (%d dimension(s))"
% (attrname, formatname, retval.ndim, len(shape),)
)
for i, d in enumerate(retval.shape):
if shape[i] and shape[i] != d:
raise TypeError("input argument for array attribute `%s' on " \
"dataformat `%s' does not respect dimension " \
"restrictions for dimension `%d' as requested in the " \
"data format (%d != %d)" % (attrname, formatname, i, d, shape[i])
)
if hasattr(dtype, 'type') and issubclass(dtype.type, numpy.generic):
raise TypeError(
"input argument for array attribute `%s' on "
"dataformat `%s' does not respect dimension "
"restrictions for dimension `%d' as requested in the "
"data format (%d != %d)" % (attrname, formatname, i, d, shape[i])
)
if hasattr(dtype, "type") and issubclass(dtype.type, numpy.generic):
if not numpy.can_cast(retval.dtype, dtype, casting=casting):
raise TypeError("cannot safely cast array attribute `%s' " \
"on dataformat `%s' with type `%s' to `%s' without precision " \
"loss" % (attrname, formatname, retval.dtype, dtype))
raise TypeError(
"cannot safely cast array attribute `%s' "
"on dataformat `%s' with type `%s' to `%s' without precision "
"loss" % (attrname, formatname, retval.dtype, dtype)
)
return retval.astype(dtype)
elif issubclass(dtype, str): #it is a string
elif issubclass(dtype, str): # it is a string
return numpy.array(retval, dtype=object).view(_protected_str_ndarray)
# it is a dataformat
......@@ -242,10 +253,11 @@ def setup_array(formatname, attrname, shape, dtype, value, casting,
retval = numpy.frompyfunc(constructor, 1, 1)(retval).view(_protected_ndarray)
retval._format_dtype = dtype
retval._format_casting = 'safe'
retval._format_casting = "safe"
retval._format_add_defaults = True
return retval
def pack_array(dtype, value, fd):
"""Binary-encodes the array at ``value`` into the file descriptor ``fd``
......@@ -265,18 +277,18 @@ def pack_array(dtype, value, fd):
shape_format = ENDIANNESS + str(len(value.shape)) + SIZE
fd.write(struct.pack(shape_format, *value.shape))
if hasattr(dtype, 'type') and issubclass(dtype.type, numpy.generic):
if hasattr(dtype, "type") and issubclass(dtype.type, numpy.generic):
# N.B.: this bit of code is optimized to reduce memory usage
# if it is not C-style (row order) and memory contiguous, make a copy
value = numpy.require(value, requirements='C') # C_CONTIGUOUS
value = numpy.require(value, requirements="C") # C_CONTIGUOUS
# makes sure endianness is respected, will copy otherwise
value = value.astype(ENDIANNESS + value.dtype.str[1:], copy=False)
fd.write(value.tostring())
elif issubclass(dtype, str): # it is a string
for item in value.flat:
encoded = item.encode('utf-8')
encoded = item.encode("utf-8")
length = len(encoded)
fd.write(struct.pack(STRING % length, length, encoded))
......@@ -300,15 +312,16 @@ def pack_scalar(dtype, value, fd):
"""
if hasattr(dtype, 'type') and issubclass(dtype.type, numpy.generic):
if hasattr(dtype, "type") and issubclass(dtype.type, numpy.generic):
if issubclass(dtype.type, numpy.complexfloating):
fd.write(struct.pack(ENDIANNESS + '2' + BINCODE[dtype],
value.real, value.imag))
fd.write(
struct.pack(ENDIANNESS + "2" + BINCODE[dtype], value.real, value.imag)
)
else:
fd.write(struct.pack(ENDIANNESS + BINCODE[dtype], value))
elif issubclass(dtype, str): # it is a string
encoded = value.encode('utf-8')
encoded = value.encode("utf-8")
length = len(encoded)
fd.write(struct.pack(STRING % length, length, encoded))
......@@ -323,10 +336,10 @@ def read_some(format, fd):
def read_string(fd):
"""Reads the next string from the file descriptor ``fd``"""
string_format = '%ds' % read_some(ENDIANNESS + SIZE, fd)
string_format = "%ds" % read_some(ENDIANNESS + SIZE, fd)
retval = read_some(string_format, fd)
if not isinstance(retval[0], str):
return (retval[0].decode('utf8'),)
return (retval[0].decode("utf8"),)
return retval
......@@ -352,9 +365,9 @@ def unpack_array(shape, dtype, fd):
"""
# reads the actual array shape: remember, the declaration may have zeros
shape_ = read_some(ENDIANNESS + str(len(shape)) + 'Q', fd)
shape_ = read_some(ENDIANNESS + str(len(shape)) + "Q", fd)
if hasattr(dtype, 'type') and issubclass(dtype.type, numpy.generic):
if hasattr(dtype, "type") and issubclass(dtype.type, numpy.generic):
# N.B.: this bit of code is optimized to reduce memory usage
data_format = ENDIANNESS + dtype.str[1:]
count = numpy.prod(shape_)
......@@ -395,10 +408,10 @@ def unpack_scalar(dtype, fd):
"""
if hasattr(dtype, 'type') and issubclass(dtype.type, numpy.generic):
if hasattr(dtype, "type") and issubclass(dtype.type, numpy.generic):
if issubclass(dtype.type, numpy.complexfloating): # complex
data_format = ENDIANNESS + '2' + BINCODE[dtype]
data_format = ENDIANNESS + "2" + BINCODE[dtype]
a = read_some(data_format, fd)
return dtype.type(complex(a[0], a[1]))
......@@ -431,9 +444,9 @@ class baseformat(object):
def __init__(self, **kwargs):
self.from_dict(kwargs, casting='unsafe', add_defaults=True)
self.from_dict(kwargs, casting="unsafe", add_defaults=True)
def from_dict(self, data, casting='safe', add_defaults=False):
def from_dict(self, data, casting="safe", add_defaults=False):
"""Same as initializing the object, but with a less strict type casting
Construction is, by default, set to using a **unsafe** data type
......@@ -459,7 +472,8 @@ class baseformat(object):
"""
if data is None: data = {}
if data is None:
data = {}
user_attributes = set([k for k in data.keys() if k != "__type__"])
declared_attributes = set(self._format.keys())
......@@ -468,25 +482,29 @@ class baseformat(object):
# in this case, the user must provide all attributes
if user_attributes != declared_attributes:
undeclared_attributes = declared_attributes - user_attributes
raise AttributeError("missing attributes (%s) for dataformat " \
"`%s' which require `%s'" % (
', '.join(undeclared_attributes),
self._name,
', '.join(declared_attributes),
),
)
raise AttributeError(
"missing attributes (%s) for dataformat "
"`%s' which require `%s'"
% (
", ".join(undeclared_attributes),
self._name,
", ".join(declared_attributes),
),
)
iterate_attributes = user_attributes
else: # then, the user passed attributes must be a subset
if not user_attributes.issubset(declared_attributes):
unknown_attributes = user_attributes - declared_attributes
raise AttributeError("unexpected attribute (%s) for dataformat " \
"`%s' which require `%s'" % (
', '.join(unknown_attributes),
self._name,
', '.join(declared_attributes),
),
)
raise AttributeError(
"unexpected attribute (%s) for dataformat "
"`%s' which require `%s'"
% (
", ".join(unknown_attributes),
self._name,
", ".join(declared_attributes),
),
)
iterate_attributes = declared_attributes
for k in iterate_attributes:
......@@ -505,15 +523,16 @@ class baseformat(object):
if isinstance(self._format[key], list):
dtype = getattr(self.__class__, key)[-1]
if (hasattr(dtype, 'type') and issubclass(dtype.type, numpy.generic)) \
or dtype is str:
if (
hasattr(dtype, "type") and issubclass(dtype.type, numpy.generic)
) or dtype is str:
retval[key] = v
else: # it is an array of dataformat objects
retval[key] = numpy.frompyfunc(lambda x: x.as_dict(), 1, 1)(v)
retval[key] = retval[key].tolist()
else:
retval[key] = v if not hasattr(v, 'as_dict') else v.as_dict()
retval[key] = v if not hasattr(v, "as_dict") else v.as_dict()
return retval
......@@ -528,8 +547,10 @@ class baseformat(object):
for key in sorted(self._format.keys()):
dtype = getattr(self.__class__, key)
value = getattr(self, key)
if isinstance(dtype, list): pack_array(dtype[-1], value, fd)
else: pack_scalar(dtype, value, fd)
if isinstance(dtype, list):
pack_array(dtype[-1], value, fd)
else:
pack_scalar(dtype, value, fd)
def pack(self):
"""Creates a binary representation of this object as a string
......@@ -564,7 +585,6 @@ class baseformat(object):
return self
def unpack(self, s):
"""Loads a binary representation of this object from a string
......@@ -574,7 +594,6 @@ class baseformat(object):
return self.unpack_from(six.BytesIO(s))
def isclose(self, other, *args, **kwargs):
"""Tests for closeness in the numerical sense.
......@@ -592,7 +611,8 @@ class baseformat(object):
"""
if not isinstance(other, self.__class__): return False
if not isinstance(other, self.__class__):
return False
for key in sorted(self._format.keys()):
......@@ -603,24 +623,31 @@ class baseformat(object):
if isinstance(dtype, list):
dtype = dtype[-1]
if hasattr(dtype, 'type'): # numpy array
if hasattr(dtype, "type"): # numpy array
# note: avoid numpy.all(numpy.isclose()) for arrays
# see bug https://github.com/numpy/numpy/issues/2280
if not numpy.allclose(this, that, *args, **kwargs):
return False
elif issubclass(dtype, six.string_types): # simple string
if not numpy.all(this == that): return False
else: #baseformat
isclose = numpy.frompyfunc(lambda x,y: x.isclose(y, *args, **kwargs), 2, 1)
if not numpy.all(isclose(this, that)): return False
if not numpy.all(this == that):
return False
else: # baseformat
isclose = numpy.frompyfunc(
lambda x, y: x.isclose(y, *args, **kwargs), 2, 1
)
if not numpy.all(isclose(this, that)):
return False
else:
if hasattr(dtype, 'type'): # numpy scalar
if not numpy.isclose(this, that, *args, **kwargs): return False
if hasattr(dtype, "type"): # numpy scalar
if not numpy.isclose(this, that, *args, **kwargs):
return False
elif issubclass(dtype, six.string_types): # simple string
if this != that: return False
if this != that:
return False
else: # baseformat
if not this.isclose(that, *args, **kwargs): return False
if not this.isclose(that, *args, **kwargs):
return False
# if you survived to this point, the objects are close
return True
......@@ -641,15 +668,28 @@ class baseformat(object):
dtype = getattr(self.__class__, key)
if isinstance(dtype, list):
value = setup_array(self._name, key, dtype[:-1], dtype[-1], value,
casting=casting, add_defaults=add_defaults)
value = setup_array(
self._name,
key,
dtype[:-1],
dtype[-1],
value,
casting=casting,
add_defaults=add_defaults,
)
else:
value = setup_scalar(self._name, key, dtype, value,
casting=casting, add_defaults=add_defaults)
value = setup_scalar(
self._name,
key,
dtype,
value,
casting=casting,
add_defaults=add_defaults,
)
return object.__setattr__(self, key, value)
def __setattr__(self, key, value):
"""Set an attribute, with validation"""
return self._setattr(key, value, 'safe', False)
return self._setattr(key, value, "safe", False)
......@@ -42,23 +42,24 @@ data
Data I/O classes and functions
"""
import abc
import glob
import logging
import os
import re
import glob
import simplejson as json
import time
import abc
import zmq
import logging
import six
from functools import reduce
from collections import namedtuple
from functools import reduce
import simplejson as json
import six
import zmq
from .hash import hashFileContents
from .dataformat import DataFormat
from .algorithm import Algorithm
from .dataformat import DataFormat
from .exceptions import RemoteException
from .hash import hashFileContents
logger = logging.getLogger(__name__)
......@@ -107,8 +108,8 @@ def mixDataIndices(list_of_data_indices):
for index in range(start, end + 1):
done = False
for l in list_of_data_indices:
for indices in l:
for data_indices in list_of_data_indices:
for indices in data_indices:
if indices[1] == index:
result.append((current_start, index))
current_start = index + 1
......
......@@ -46,7 +46,6 @@ import six
from .data import mixDataIndices
# ----------------------------------------------------------
......
......@@ -42,23 +42,22 @@ database
Validation of databases
"""
import itertools
import os
import sys
import six
import simplejson as json
import itertools
import numpy as np
from collections import namedtuple
import numpy as np
import simplejson as json
import six
from . import loader
from . import utils
from .protocoltemplate import ProtocolTemplate
from .dataformat import DataFormat
from .outputs import OutputList
from .exceptions import OutputError
from .outputs import OutputList
from .protocoltemplate import ProtocolTemplate
# ----------------------------------------------------------
......
......@@ -42,17 +42,16 @@ dataformat
Validation and parsing for dataformats
"""
import re
import copy
import re
import six
import numpy
import simplejson as json
import six
from . import utils
from .baseformat import baseformat
# ----------------------------------------------------------
......
......@@ -42,13 +42,13 @@ hash
Various functions for hashing platform contributions and others
"""
import hashlib
import simplejson
import collections
import copy
import six
import hashlib
import os
import simplejson
import six
# ----------------------------------------------------------
......@@ -59,9 +59,9 @@ def _sha256(s):
"""
if isinstance(s, six.string_types):
try:
s = six.u(s).encode('utf-8')
s = six.u(s).encode("utf-8")
except Exception:
s = s.encode('utf-8')
s = s.encode("utf-8")
return hashlib.sha256(s).hexdigest()
......@@ -71,14 +71,14 @@ def _sha256(s):
def _stringify(dictionary):
names = sorted(dictionary.keys())
converted_dictionary = '{'
converted_dictionary = "{"
for name in names:
converted_dictionary += '"%s":%s,' % (name, str(dictionary[name]))
if len(converted_dictionary) > 1:
converted_dictionary = converted_dictionary[:-1]
converted_dictionary += '}'
converted_dictionary += "}"
return converted_dictionary
......@@ -87,13 +87,13 @@ def _stringify(dictionary):
def _compact(text):
return text.replace(' ', '').replace('\n', '')
return text.replace(" ", "").replace("\n", "")
# ----------------------------------------------------------
def toPath(hash, suffix='.data'):
def toPath(hash, suffix=".data"):
""" Returns the path on disk which corresponds to the hash given.
Parameters:
......@@ -159,7 +159,7 @@ def hashJSON(contents, description):
contents = copy.deepcopy(contents) # temporary copy
del contents[description]
contents = simplejson.dumps(contents, sort_keys=True)
return hashlib.sha256(contents.encode('utf-8')).hexdigest()
return hashlib.sha256(contents.encode("utf-8")).hexdigest()
# ----------------------------------------------------------
......@@ -175,11 +175,14 @@ def hashJSONFile(path, description):
"""
try:
with open(path, 'rb') as f:
with open(path, "rb") as f:
# preserve order
return hashJSON(simplejson.loads(f.read().decode('utf-8'),
object_pairs_hook=collections.OrderedDict),
description)
return hashJSON(
simplejson.loads(
f.read().decode("utf-8"), object_pairs_hook=collections.OrderedDict
),
description,
)
except simplejson.JSONDecodeError:
# falls back to normal file content hashing
return hashFileContents(path)
......@@ -195,9 +198,9 @@ def hashFileContents(path):