From aa4eff11d058407387e44782d0f48edd5da7307d Mon Sep 17 00:00:00 2001
From: Philip Abbet <philip.abbet@idiap.ch>
Date: Fri, 17 Mar 2017 10:50:21 +0100
Subject: [PATCH] More refactoring

---
 beat/backend/python/database.py    |  67 +++++++-
 beat/backend/python/dbexecution.py |   4 +-
 beat/backend/python/hash.py        |  65 +++++++
 beat/backend/python/utils.py       | 265 +++++++++++++++++++++++++++++
 4 files changed, 392 insertions(+), 9 deletions(-)
 create mode 100644 beat/backend/python/utils.py

diff --git a/beat/backend/python/database.py b/beat/backend/python/database.py
index f4114bd..c937a36 100644
--- a/beat/backend/python/database.py
+++ b/beat/backend/python/database.py
@@ -30,17 +30,40 @@
 
 import os
 import sys
-# import collections
 
 import six
 import simplejson
 
 from . import loader
+from . import utils
+
+from .dataformat import DataFormat
+
+
+
+class Storage(utils.CodeStorage):
+  """Resolves paths for databases
+
+  Parameters:
+
+    prefix (str): Establishes the prefix of your installation.
+
+    name (str): The name of the database object in the format
+      ``<name>/<version>``.
+
+  """
+
+  def __init__(self, prefix, name):
+
+    if name.count('/') != 1:
+      raise RuntimeError("invalid database name: `%s'" % name)
+
+    self.name, self.version = name.split('/')
+    self.fullname = name
+
+    path = os.path.join(prefix, 'databases', name)
+    super(Storage, self).__init__(path, 'python') #views are coded in Python
 
-# from . import dataformat
-# from . import hash
-# from . import utils
-# from . import prototypes
 
 
 class View(object):
@@ -207,7 +230,9 @@ class Database(object):
     self._name = None
     self.prefix = prefix
     self.dataformats = {} # preloaded dataformats
+    self.storage = None
 
+    self.errors = []
     self.data = None
 
     # if the user has not provided a cache, still use one for performance
@@ -220,8 +245,31 @@ class Database(object):
     """Loads the database"""
 
     self._name = data
-    json_path = os.path.join(prefix, 'databases', name + '.json')
-    with open(json_path, 'rb') as f: self.data = simplejson.load(f)
+
+    self.storage = Storage(self.prefix, self._name)
+    json_path = self.storage.json.path
+    if not self.storage.json.exists():
+      self.errors.append('Database declaration file not found: %s' % json_path)
+      return
+
+    with open(json_path, 'rb') as f:
+      self.data = simplejson.load(f)
+
+    for protocol in self.data['protocols']:
+      for _set in protocol['sets']:
+
+        for key, value in _set['outputs'].items():
+
+          if value in self.dataformats:
+            continue
+
+          if value in dataformat_cache:
+            dataformat = dataformat_cache[value]
+          else:
+            dataformat = DataFormat(self.prefix, value)
+            dataformat_cache[value] = dataformat
+
+          self.dataformats[value] = dataformat
 
 
   @property
@@ -237,6 +285,11 @@ class Database(object):
     return self.data.get('schema_version', 1)
 
 
+  @property
+  def valid(self):
+    return not bool(self.errors)
+
+
   @property
   def protocols(self):
     """The declaration of all the protocols of the database"""
diff --git a/beat/backend/python/dbexecution.py b/beat/backend/python/dbexecution.py
index 899a699..c82e82f 100644
--- a/beat/backend/python/dbexecution.py
+++ b/beat/backend/python/dbexecution.py
@@ -119,10 +119,10 @@ class DBExecutor(object):
     database_cache = database_cache if database_cache is not None else {}
     self.dataformat_cache = dataformat_cache if dataformat_cache is not None else {}
 
-    self._load(data, self.dataformat_cache, database_cache)
+    self._load(data, database_cache)
 
 
-  def _load(self, data, dataformat_cache, database_cache):
+  def _load(self, data, database_cache):
     """Loads the block execution information"""
 
     # reset
diff --git a/beat/backend/python/hash.py b/beat/backend/python/hash.py
index 82c3f78..7b7b815 100644
--- a/beat/backend/python/hash.py
+++ b/beat/backend/python/hash.py
@@ -32,6 +32,71 @@
 import hashlib
 
 
+def _sha256(s):
+  """A python2/3 replacement for :py:func:`haslib.sha256`"""
+
+  try:
+    if isinstance(s, str): s = six.u(s)
+    return hashlib.sha256(s.encode('utf8')).hexdigest()
+  except:
+    return hashlib.sha256(s).hexdigest()
+
+
+
+def _stringify(dictionary):
+  names = sorted(dictionary.keys())
+
+  converted_dictionary = '{'
+  for name in names:
+    converted_dictionary += '"%s":%s,' % (name, str(dictionary[name]))
+
+  if len(converted_dictionary) > 1:
+    converted_dictionary = converted_dictionary[:-1]
+
+  converted_dictionary += '}'
+
+  return converted_dictionary
+
+
+
+def hash(dictionary_or_string):
+  if isinstance(dictionary_or_string, dict):
+    return _sha256(_stringify(dictionary_or_string))
+  else:
+    return _sha256(dictionary_or_string)
+
+
+
+def hashJSON(contents, description):
+  """Hashes the pre-loaded JSON object using :py:func:`hashlib.sha256`
+
+  Excludes description changes
+  """
+
+  if description in contents:
+    contents = copy.deepcopy(contents) #temporary copy
+    del contents[description]
+  contents = simplejson.dumps(contents, sort_keys=True)
+  return hashlib.sha256(contents).hexdigest()
+
+
+
+def hashJSONFile(path, description):
+  """Hashes the JSON file contents using :py:func:`hashlib.sha256`
+
+  Excludes description changes
+  """
+
+  try:
+    with open(path, 'rb') as f:
+      return hashJSON(simplejson.load(f,
+        object_pairs_hook=collections.OrderedDict), description) #preserve order
+  except simplejson.JSONDecodeError:
+    # falls back to normal file content hashing
+    return hashFileContents(path)
+
+
+
 def hashFileContents(path):
   """Hashes the file contents using :py:func:`hashlib.sha256`."""
 
diff --git a/beat/backend/python/utils.py b/beat/backend/python/utils.py
new file mode 100644
index 0000000..4dd78ca
--- /dev/null
+++ b/beat/backend/python/utils.py
@@ -0,0 +1,265 @@
+#!/usr/bin/env python
+# vim: set fileencoding=utf-8 :
+
+###############################################################################
+#                                                                             #
+# Copyright (c) 2016 Idiap Research Institute, http://www.idiap.ch/           #
+# Contact: beat.support@idiap.ch                                              #
+#                                                                             #
+# This file is part of the beat.core module of the BEAT platform.             #
+#                                                                             #
+# Commercial License Usage                                                    #
+# Licensees holding valid commercial BEAT licenses may use this file in       #
+# accordance with the terms contained in a written agreement between you      #
+# and Idiap. For further information contact tto@idiap.ch                     #
+#                                                                             #
+# Alternatively, this file may be used under the terms of the GNU Affero      #
+# Public License version 3 as published by the Free Software and appearing    #
+# in the file LICENSE.AGPL included in the packaging of this file.            #
+# The BEAT platform is distributed in the hope that it will be useful, but    #
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY  #
+# or FITNESS FOR A PARTICULAR PURPOSE.                                        #
+#                                                                             #
+# You should have received a copy of the GNU Affero Public License along      #
+# with the BEAT platform. If not, see http://www.gnu.org/licenses/.           #
+#                                                                             #
+###############################################################################
+
+
+import os
+import shutil
+import collections
+
+import simplejson
+
+import six
+
+from . import hash
+
+
+def safe_rmfile(f):
+  """Safely removes a file from the disk"""
+
+  if os.path.exists(f): os.unlink(f)
+
+
+
+def safe_rmdir(f):
+  """Safely removes the directory containg a given file from the disk"""
+
+  d = os.path.dirname(f)
+  if not os.path.exists(d): return
+  if not os.listdir(d): os.rmdir(d)
+
+
+
+def extension_for_language(language):
+  """Returns the preferred extension for a given programming language
+
+  The set of languages supported must match those declared in our
+  ``common.json`` schema.
+
+  Parameters:
+
+    language (str) The language for which you'd like to get the extension for.
+
+
+  Returns:
+
+    str: The extension for the given language, including a leading ``.`` (dot)
+
+
+  Raises:
+
+    KeyError: If the language is not defined in our internal dictionary.
+
+  """
+
+  return dict(
+          unknown = '',
+          cxx = '.so',
+          matlab = '.m',
+          python = '.py',
+          r = '.r',
+          )[language]
+
+
+
+class File(object):
+  """User helper to read and write file objects"""
+
+
+  def __init__(self, path, binary=False):
+
+    self.path = path
+    self.binary = binary
+
+
+  def exists(self):
+
+    return os.path.exists(self.path)
+
+
+  def load(self):
+
+    mode = 'rb' if self.binary else 'rt'
+    with open(self.path, mode) as f: return f.read()
+
+
+  def try_load(self):
+
+    if os.path.exists(self.path):
+      return self.load()
+    return None
+
+
+  def backup(self):
+
+    if not os.path.exists(self.path): return #no point in backing-up
+    backup = self.path + '~'
+    if os.path.exists(backup): os.remove(backup)
+    shutil.copy(self.path, backup)
+
+
+  def save(self, contents):
+
+    d = os.path.dirname(self.path)
+    if not os.path.exists(d): os.makedirs(d)
+
+    if os.path.exists(self.path): self.backup()
+
+    mode = 'wb' if self.binary else 'wt'
+    with open(self.path, mode) as f: f.write(contents)
+
+
+  def remove(self):
+
+    safe_rmfile(self.path)
+    safe_rmfile(self.path + '~') #backup
+    safe_rmdir(self.path) #remove containing directory
+
+
+
+class Storage(object):
+  """Resolves paths for objects that provide only a description
+
+  Parameters:
+
+    prefix (str): Establishes the prefix of your installation.
+
+    name (str): The name of the database object in the format
+      ``<name>/<version>``.
+
+  """
+
+  def __init__(self, path):
+
+    self.path = path
+    self.json = File(self.path + '.json')
+    self.doc = File(self.path + '.rst')
+
+  def hash(self, description='description'):
+    """The 64-character hash of the database declaration JSON"""
+    return hash.hashJSONFile(self.json.path, description)
+
+  def exists(self):
+    """If the database declaration file exists"""
+    return self.json.exists()
+
+  def load(self):
+    """Loads the JSON declaration as a file"""
+    tp = collections.namedtuple('Storage', ['declaration', 'description'])
+    return tp(self.json.load(), self.doc.try_load())
+
+  def save(self, declaration, description=None):
+    """Saves the JSON declaration as files"""
+    if description: self.doc.save(description.encode('utf8'))
+    if not isinstance(declaration, six.string_types):
+      declaration = simplejson.dumps(declaration, indent=4)
+    self.json.save(declaration)
+
+  def remove(self):
+    """Removes the object from the disk"""
+    self.json.remove()
+    self.doc.remove()
+
+
+
+class CodeStorage(object):
+  """Resolves paths for objects that provide a description and code
+
+  Parameters:
+
+    prefix (str): Establishes the prefix of your installation.
+
+    name (str): The name of the database object in the format
+      ``<name>/<version>``.
+
+    language (str): One of the valdid programming languages
+
+  """
+
+  def __init__(self, path, language=None):
+
+    self.path = path
+    self.json = File(self.path + '.json')
+    self.doc = File(self.path + '.rst')
+
+    self._language = language or self.__auto_discover_language()
+    self.code = File(self.path + \
+            extension_for_language(self._language), binary=True)
+
+  def __auto_discover_language(self, json=None):
+    """Discovers and sets the language from its own JSON descriptor"""
+    try:
+      text = json or self.json.load()
+      json = simplejson.loads(text)
+      return json['language']
+    except IOError:
+      return 'unknown'
+
+  @property
+  def language(self):
+    return self._language
+
+  @language.setter
+  def language(self, value):
+    self._language = value
+    self.code = File(self.path + extension_for_language(self._language),
+            binary=True)
+
+  def hash(self):
+    """The 64-character hash of the database declaration JSON"""
+
+    if self.code.exists():
+      return hash.hash(dict(
+          json=hash.hashJSONFile(self.json.path, 'description'),
+          code=hash.hashFileContents(self.code.path),
+          ))
+    else:
+      return hash.hash(dict(
+          json=hash.hashJSONFile(self.json.path, 'description'),
+          ))
+
+  def exists(self):
+    """If the database declaration file exists"""
+    return self.json.exists() and self.code.exists()
+
+  def load(self):
+    """Loads the JSON declaration as a file"""
+    tp = collections.namedtuple('CodeStorage',
+            ['declaration', 'code', 'description'])
+    return tp(self.json.load(), self.code.try_load(), self.doc.try_load())
+
+  def save(self, declaration, code=None, description=None):
+    """Saves the JSON declaration and the code as files"""
+    super(CodeStorage, self).save(declaration, description)
+    if code:
+      if self._language == 'unknown':
+        self.language = self.__auto_discover_language(declaration)
+      self.code.save(code)
+
+  def remove(self):
+    """Removes the object from the disk"""
+    super(CodeStorage, self).remove()
+    self.code.remove()
-- 
GitLab