From aa4eff11d058407387e44782d0f48edd5da7307d Mon Sep 17 00:00:00 2001 From: Philip Abbet <philip.abbet@idiap.ch> Date: Fri, 17 Mar 2017 10:50:21 +0100 Subject: [PATCH] More refactoring --- beat/backend/python/database.py | 67 +++++++- beat/backend/python/dbexecution.py | 4 +- beat/backend/python/hash.py | 65 +++++++ beat/backend/python/utils.py | 265 +++++++++++++++++++++++++++++ 4 files changed, 392 insertions(+), 9 deletions(-) create mode 100644 beat/backend/python/utils.py diff --git a/beat/backend/python/database.py b/beat/backend/python/database.py index f4114bd..c937a36 100644 --- a/beat/backend/python/database.py +++ b/beat/backend/python/database.py @@ -30,17 +30,40 @@ import os import sys -# import collections import six import simplejson from . import loader +from . import utils + +from .dataformat import DataFormat + + + +class Storage(utils.CodeStorage): + """Resolves paths for databases + + Parameters: + + prefix (str): Establishes the prefix of your installation. + + name (str): The name of the database object in the format + ``<name>/<version>``. + + """ + + def __init__(self, prefix, name): + + if name.count('/') != 1: + raise RuntimeError("invalid database name: `%s'" % name) + + self.name, self.version = name.split('/') + self.fullname = name + + path = os.path.join(prefix, 'databases', name) + super(Storage, self).__init__(path, 'python') #views are coded in Python -# from . import dataformat -# from . import hash -# from . import utils -# from . import prototypes class View(object): @@ -207,7 +230,9 @@ class Database(object): self._name = None self.prefix = prefix self.dataformats = {} # preloaded dataformats + self.storage = None + self.errors = [] self.data = None # if the user has not provided a cache, still use one for performance @@ -220,8 +245,31 @@ class Database(object): """Loads the database""" self._name = data - json_path = os.path.join(prefix, 'databases', name + '.json') - with open(json_path, 'rb') as f: self.data = simplejson.load(f) + + self.storage = Storage(self.prefix, self._name) + json_path = self.storage.json.path + if not self.storage.json.exists(): + self.errors.append('Database declaration file not found: %s' % json_path) + return + + with open(json_path, 'rb') as f: + self.data = simplejson.load(f) + + for protocol in self.data['protocols']: + for _set in protocol['sets']: + + for key, value in _set['outputs'].items(): + + if value in self.dataformats: + continue + + if value in dataformat_cache: + dataformat = dataformat_cache[value] + else: + dataformat = DataFormat(self.prefix, value) + dataformat_cache[value] = dataformat + + self.dataformats[value] = dataformat @property @@ -237,6 +285,11 @@ class Database(object): return self.data.get('schema_version', 1) + @property + def valid(self): + return not bool(self.errors) + + @property def protocols(self): """The declaration of all the protocols of the database""" diff --git a/beat/backend/python/dbexecution.py b/beat/backend/python/dbexecution.py index 899a699..c82e82f 100644 --- a/beat/backend/python/dbexecution.py +++ b/beat/backend/python/dbexecution.py @@ -119,10 +119,10 @@ class DBExecutor(object): database_cache = database_cache if database_cache is not None else {} self.dataformat_cache = dataformat_cache if dataformat_cache is not None else {} - self._load(data, self.dataformat_cache, database_cache) + self._load(data, database_cache) - def _load(self, data, dataformat_cache, database_cache): + def _load(self, data, database_cache): """Loads the block execution information""" # reset diff --git a/beat/backend/python/hash.py b/beat/backend/python/hash.py index 82c3f78..7b7b815 100644 --- a/beat/backend/python/hash.py +++ b/beat/backend/python/hash.py @@ -32,6 +32,71 @@ import hashlib +def _sha256(s): + """A python2/3 replacement for :py:func:`haslib.sha256`""" + + try: + if isinstance(s, str): s = six.u(s) + return hashlib.sha256(s.encode('utf8')).hexdigest() + except: + return hashlib.sha256(s).hexdigest() + + + +def _stringify(dictionary): + names = sorted(dictionary.keys()) + + converted_dictionary = '{' + for name in names: + converted_dictionary += '"%s":%s,' % (name, str(dictionary[name])) + + if len(converted_dictionary) > 1: + converted_dictionary = converted_dictionary[:-1] + + converted_dictionary += '}' + + return converted_dictionary + + + +def hash(dictionary_or_string): + if isinstance(dictionary_or_string, dict): + return _sha256(_stringify(dictionary_or_string)) + else: + return _sha256(dictionary_or_string) + + + +def hashJSON(contents, description): + """Hashes the pre-loaded JSON object using :py:func:`hashlib.sha256` + + Excludes description changes + """ + + if description in contents: + contents = copy.deepcopy(contents) #temporary copy + del contents[description] + contents = simplejson.dumps(contents, sort_keys=True) + return hashlib.sha256(contents).hexdigest() + + + +def hashJSONFile(path, description): + """Hashes the JSON file contents using :py:func:`hashlib.sha256` + + Excludes description changes + """ + + try: + with open(path, 'rb') as f: + return hashJSON(simplejson.load(f, + object_pairs_hook=collections.OrderedDict), description) #preserve order + except simplejson.JSONDecodeError: + # falls back to normal file content hashing + return hashFileContents(path) + + + def hashFileContents(path): """Hashes the file contents using :py:func:`hashlib.sha256`.""" diff --git a/beat/backend/python/utils.py b/beat/backend/python/utils.py new file mode 100644 index 0000000..4dd78ca --- /dev/null +++ b/beat/backend/python/utils.py @@ -0,0 +1,265 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : + +############################################################################### +# # +# Copyright (c) 2016 Idiap Research Institute, http://www.idiap.ch/ # +# Contact: beat.support@idiap.ch # +# # +# This file is part of the beat.core module of the BEAT platform. # +# # +# Commercial License Usage # +# Licensees holding valid commercial BEAT licenses may use this file in # +# accordance with the terms contained in a written agreement between you # +# and Idiap. For further information contact tto@idiap.ch # +# # +# Alternatively, this file may be used under the terms of the GNU Affero # +# Public License version 3 as published by the Free Software and appearing # +# in the file LICENSE.AGPL included in the packaging of this file. # +# The BEAT platform is distributed in the hope that it will be useful, but # +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY # +# or FITNESS FOR A PARTICULAR PURPOSE. # +# # +# You should have received a copy of the GNU Affero Public License along # +# with the BEAT platform. If not, see http://www.gnu.org/licenses/. # +# # +############################################################################### + + +import os +import shutil +import collections + +import simplejson + +import six + +from . import hash + + +def safe_rmfile(f): + """Safely removes a file from the disk""" + + if os.path.exists(f): os.unlink(f) + + + +def safe_rmdir(f): + """Safely removes the directory containg a given file from the disk""" + + d = os.path.dirname(f) + if not os.path.exists(d): return + if not os.listdir(d): os.rmdir(d) + + + +def extension_for_language(language): + """Returns the preferred extension for a given programming language + + The set of languages supported must match those declared in our + ``common.json`` schema. + + Parameters: + + language (str) The language for which you'd like to get the extension for. + + + Returns: + + str: The extension for the given language, including a leading ``.`` (dot) + + + Raises: + + KeyError: If the language is not defined in our internal dictionary. + + """ + + return dict( + unknown = '', + cxx = '.so', + matlab = '.m', + python = '.py', + r = '.r', + )[language] + + + +class File(object): + """User helper to read and write file objects""" + + + def __init__(self, path, binary=False): + + self.path = path + self.binary = binary + + + def exists(self): + + return os.path.exists(self.path) + + + def load(self): + + mode = 'rb' if self.binary else 'rt' + with open(self.path, mode) as f: return f.read() + + + def try_load(self): + + if os.path.exists(self.path): + return self.load() + return None + + + def backup(self): + + if not os.path.exists(self.path): return #no point in backing-up + backup = self.path + '~' + if os.path.exists(backup): os.remove(backup) + shutil.copy(self.path, backup) + + + def save(self, contents): + + d = os.path.dirname(self.path) + if not os.path.exists(d): os.makedirs(d) + + if os.path.exists(self.path): self.backup() + + mode = 'wb' if self.binary else 'wt' + with open(self.path, mode) as f: f.write(contents) + + + def remove(self): + + safe_rmfile(self.path) + safe_rmfile(self.path + '~') #backup + safe_rmdir(self.path) #remove containing directory + + + +class Storage(object): + """Resolves paths for objects that provide only a description + + Parameters: + + prefix (str): Establishes the prefix of your installation. + + name (str): The name of the database object in the format + ``<name>/<version>``. + + """ + + def __init__(self, path): + + self.path = path + self.json = File(self.path + '.json') + self.doc = File(self.path + '.rst') + + def hash(self, description='description'): + """The 64-character hash of the database declaration JSON""" + return hash.hashJSONFile(self.json.path, description) + + def exists(self): + """If the database declaration file exists""" + return self.json.exists() + + def load(self): + """Loads the JSON declaration as a file""" + tp = collections.namedtuple('Storage', ['declaration', 'description']) + return tp(self.json.load(), self.doc.try_load()) + + def save(self, declaration, description=None): + """Saves the JSON declaration as files""" + if description: self.doc.save(description.encode('utf8')) + if not isinstance(declaration, six.string_types): + declaration = simplejson.dumps(declaration, indent=4) + self.json.save(declaration) + + def remove(self): + """Removes the object from the disk""" + self.json.remove() + self.doc.remove() + + + +class CodeStorage(object): + """Resolves paths for objects that provide a description and code + + Parameters: + + prefix (str): Establishes the prefix of your installation. + + name (str): The name of the database object in the format + ``<name>/<version>``. + + language (str): One of the valdid programming languages + + """ + + def __init__(self, path, language=None): + + self.path = path + self.json = File(self.path + '.json') + self.doc = File(self.path + '.rst') + + self._language = language or self.__auto_discover_language() + self.code = File(self.path + \ + extension_for_language(self._language), binary=True) + + def __auto_discover_language(self, json=None): + """Discovers and sets the language from its own JSON descriptor""" + try: + text = json or self.json.load() + json = simplejson.loads(text) + return json['language'] + except IOError: + return 'unknown' + + @property + def language(self): + return self._language + + @language.setter + def language(self, value): + self._language = value + self.code = File(self.path + extension_for_language(self._language), + binary=True) + + def hash(self): + """The 64-character hash of the database declaration JSON""" + + if self.code.exists(): + return hash.hash(dict( + json=hash.hashJSONFile(self.json.path, 'description'), + code=hash.hashFileContents(self.code.path), + )) + else: + return hash.hash(dict( + json=hash.hashJSONFile(self.json.path, 'description'), + )) + + def exists(self): + """If the database declaration file exists""" + return self.json.exists() and self.code.exists() + + def load(self): + """Loads the JSON declaration as a file""" + tp = collections.namedtuple('CodeStorage', + ['declaration', 'code', 'description']) + return tp(self.json.load(), self.code.try_load(), self.doc.try_load()) + + def save(self, declaration, code=None, description=None): + """Saves the JSON declaration and the code as files""" + super(CodeStorage, self).save(declaration, description) + if code: + if self._language == 'unknown': + self.language = self.__auto_discover_language(declaration) + self.code.save(code) + + def remove(self): + """Removes the object from the disk""" + super(CodeStorage, self).remove() + self.code.remove() -- GitLab