Commit 9b9d4f3e authored by Philip ABBET's avatar Philip ABBET
Browse files

Refactoring: Move the 'DataFormat' class into beat.backend.python

parent 6e65602f
......@@ -29,11 +29,9 @@
"""Validation and parsing for dataformats"""
import os
import re
import copy
import six
import numpy
import simplejson
from . import schema
......@@ -41,37 +39,13 @@ from . import prototypes
from . import utils
from .baseformat import baseformat
class Storage(utils.Storage):
"""Resolves paths for dataformats
from beat.backend.python.dataformat import Storage
from beat.backend.python.dataformat import DataFormat as BackendDataFormat
prefix (str): Establishes the prefix of your installation.
name (str): The name of the dataformat object in the format
def __init__(self, prefix, name):
if name.count('/') != 2:
raise RuntimeError("invalid dataformat name: `%s'" % name)
self.username,, self.version = name.split('/')
self.fullname = name
path = utils.hashed_or_simple(prefix, 'dataformats', name)
super(Storage, self).__init__(path)
def hash(self):
"""The 64-character hash of the database declaration JSON"""
return super(Storage, self).hash('#description')
class DataFormat(object):
"""Data formats define the chunks of data that circulate at data formats.
class DataFormat(BackendDataFormat):
"""Data formats define the chunks of data that circulate between blocks.
......@@ -127,25 +101,8 @@ class DataFormat(object):
def __init__(self, prefix, data, parent=None, dataformat_cache=None):
super(DataFormat, self).__init__(prefix, data, parent, dataformat_cache)
self._name = None = None
self.resolved = None
self.prefix = prefix
self.errors = [] = None
self.resolved = None
self.referenced = {}
self.parent = parent
# if the user has not provided a cache, still use one for performance
dataformat_cache = dataformat_cache if dataformat_cache is not None else {}
self._load(data, dataformat_cache)
if self._name is not None: #registers it into the cache, even if failed
dataformat_cache[self._name] = self
def _load(self, data, dataformat_cache):
"""Loads the dataformat"""
......@@ -264,217 +221,13 @@ class DataFormat(object):
# all references are resolved at this point and the final model is built
# you can lookup the original data in ```` and the final model
# in ``self.resolved``.
if self.errors: self.errors = utils.uniq(self.errors)
def name(self):
"""Returns the name of this object, either from the filename or composed
from the hierarchy it belongs.
if self.parent and self._name is None:
return self.parent[0].name + '.' + self.parent[1] + '_type'
return self._name or '__unnamed_dataformat__'
def schema_version(self):
"""Returns the schema version"""
return'#schema_version', 1)
def name(self, value):
self._name = value = Storage(self.prefix, value)
def extends(self):
"""If this dataformat extends another one, this is it, otherwise ``None``
def type(self):
"""Returns a new type that can create instances of this dataformat.
The new returned type provides a basis to construct new objects which
represent the dataformat. It provides a simple JSON serializer and a
for-screen representation.
To create an object respecting the data format from a JSON descriptor, use
the following technique:
.. code-block:: python
ftype = dataformat(...).type
json = simplejson.loads(...)
newobj = ftype(**json) # instantiates the new object, checks format
To dump the object into JSON, use the following technique:
.. code-block:: python
simplejson.dumps(newobj.as_dict(), indent=4)
A string representation of the object uses the technique above to
pretty-print the object contents to the screen.
if self.resolved is None:
raise RuntimeError("Cannot prototype while not properly initialized")
classname = re.sub(r'[-/]', '_',
if not isinstance(classname, str): classname = str(classname)
def init(self, **kwargs): baseformat.__init__(self, **kwargs)
attributes = dict(
# create the converters for the class we're about to return
for k, v in self.resolved.items():
if isinstance(v, list): #it is an array
attributes[k] = copy.deepcopy(v)
if isinstance(v[-1], DataFormat):
attributes[k][-1] = v[-1].type
if v[-1] in ('string', 'str'):
attributes[k][-1] = str
attributes[k][-1] = numpy.dtype(v[-1])
elif isinstance(v, DataFormat): #it is another dataformat
attributes[k] = v.type
else: #it is a simple type
if v in ('string', 'str'):
attributes[k] = str
attributes[k] = numpy.dtype(v)
return type(
def valid(self):
return not bool(self.errors)
def validate(self, data):
"""Validates a piece of data provided by the user
In order to validate, the data object must be complete and safe-castable to
this dataformat. For any other validation operation that would require
special settings, use instead the :py:meth:`type` method to generate a
valid type and use either ``from_dict``, ``unpack`` or ``unpack_from``
depending on your use-case.
data (dict, str, fd): This parameter represents the data to be validated.
It may be a dictionary with the JSON representation of a data blob or,
else, a binary blob (represented by either a string or a file
descriptor object) from which the data will be read. If problems occur,
an exception is raised.
``None``: Raises if an error occurs.
obj = self.type()
if isinstance(data, dict):
obj.from_dict(data, casting='safe', add_defaults=False)
elif isinstance(data, six.string_types):
def isparent(self, other):
"""Tells if the other object extends self (directly or indirectly).
other (DataFormat): another object to check
bool: ``True``, if ``other`` is a parent of ``self``. ``False``
if other.extends:
if == other.extends: return True
else: return self.isparent(other.referenced[other.extends])
return False
def description(self):
"""The short description for this object"""
return'#description', None)
def description(self, value):
"""Sets the short description for this object"""['#description'] = value
def documentation(self):
"""The full-length description for this object"""
if not self._name:
raise RuntimeError("dataformat has no name")
return None
def documentation(self, value):
"""Sets the full-length description for this object"""
if not self._name:
raise RuntimeError("dataformat has no name")
if hasattr(value, 'read'):
def hash(self):
"""Returns the hexadecimal hash for its declaration"""
if not self._name:
raise RuntimeError("dataformat has no name")
if self.errors:
self.errors = utils.uniq(self.errors)
def json_dumps(self, indent=4):
"""Dumps the JSON declaration of this object in a string
indent (int): The number of indentation spaces at every indentation level
......@@ -95,12 +95,6 @@ def toPath(hash, suffix='.data'):
def toUserPath(username):
hash = _sha256(username)
return os.path.join(hash[0:2], hash[2:4], username)
def hashJSONStr(contents, description):
"""Hashes the JSON string contents using :py:func:`hashlib.sha256`
......@@ -44,17 +44,6 @@ def temporary_directory(prefix='beat_'):
def hashed_or_simple(prefix, what, path, suffix='.json'):
"""Returns a hashed path or simple path depending on where the resource is"""
username, right_bit = path.split('/', 1)
hashed_prefix = hash.toUserPath(username)
candidate = os.path.join(prefix, what, hashed_prefix, right_bit)
if os.path.exists(candidate + suffix): return candidate
return os.path.join(prefix, what, path)
class NumpyJSONEncoder(simplejson.JSONEncoder):
"""Encodes numpy arrays and scalars
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment