Commit 73491015 authored by Amir MOHAMMADI's avatar Amir MOHAMMADI

Integrate deeper with config and prefix

parent 777396f1
Pipeline #41900 failed with stage
in 90 minutes and 3 seconds
......@@ -22,7 +22,6 @@ repos:
- id: debug-statements
exclude: beat/backend/python/test/prefix/.*/syntax_error
- id: check-added-large-files
- id: check-docstring-first
- id: flake8
exclude: beat/backend/python/test/prefix/.*/(.*crash|syntax_error)
- id: check-yaml
......
......@@ -1056,3 +1056,7 @@ class Algorithm(object):
k.export(prefix)
self.write(Storage(prefix, self.name, self.language))
class Analyzer(Algorithm):
"""docstring for Analyzer"""
......@@ -123,6 +123,8 @@ def setup_scalar(formatname, attrname, dtype, value, casting, add_defaults):
return str(value)
else: # it is a dataformat
if isinstance(value, baseformat):
return value
return dtype().from_dict(value, casting=casting, add_defaults=add_defaults)
......
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
###################################################################################
# #
# Copyright (c) 2020 Idiap Research Institute, http://www.idiap.ch/ #
# Contact: beat.support@idiap.ch #
# #
# Redistribution and use in source and binary forms, with or without #
# modification, are permitted provided that the following conditions are met: #
# #
# 1. Redistributions of source code must retain the above copyright notice, this #
# list of conditions and the following disclaimer. #
# #
# 2. Redistributions in binary form must reproduce the above copyright notice, #
# this list of conditions and the following disclaimer in the documentation #
# and/or other materials provided with the distribution. #
# #
# 3. Neither the name of the copyright holder nor the names of its contributors #
# may be used to endorse or promote products derived from this software without #
# specific prior written permission. #
# #
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND #
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED #
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE #
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE #
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL #
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR #
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER #
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, #
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE #
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #
# #
###################################################################################
"""Global Configuration state and management"""
import getpass
import logging
import os
from contextlib import contextmanager
logger = logging.getLogger(__name__)
DEFAULTS = {
"user": getpass.getuser(),
"prefix": os.path.realpath(os.path.join(os.curdir, "prefix")),
"cache": "cache",
}
"""Default values of the configuration"""
DOC = {
"user": "User name for operations that create, delete or edit objects",
"prefix": "Directory containing BEAT objects",
"cache": "Directory to use for data caching (relative to prefix)",
}
"""Documentation for configuration parameters"""
_global_config = DEFAULTS.copy()
def get_config():
"""Retrieve current values for configuration set by :func:`set_config`
Returns
-------
config : dict
Keys are parameter names that can be passed to :func:`set_config`.
See Also
--------
config_context: Context manager for global configuration
set_config: Set global configuration
"""
return _global_config.copy()
def set_config(**kwargs):
"""Set global configuration
Parameters
----------
user : str, optional
The username used when creating objects
prefix : str, optional
The path to the current prefix
cache : str, optional
The path to the current cache
See Also
--------
config_context: Context manager for global configuration
get_config: Retrieve current values of the global configuration
"""
supported_keys = set(DEFAULTS.keys())
set_keys = set(kwargs.keys())
if set_keys not in supported_keys:
raise ValueError(
f"Only {supported_keys} are valid configurations. "
f"Got these extra values: {set_keys - supported_keys}"
)
kwargs = {k: v for k, v in kwargs.items() if v is not None}
_global_config.update(kwargs)
# if a new prefix path is set, clear the prefix
if "prefix" in kwargs:
Prefix().clear()
@contextmanager
def config_context(**new_config):
"""Context manager for global configuration
Parameters
----------
user : str, optional
The username used when creating objects
prefix : str, optional
The path to the current prefix
cache : str, optional
The path to the current cache
Notes
-----
All settings, not just those presently modified, will be returned to
their previous values when the context manager is exited. This is not
thread-safe.
See Also
--------
set_config: Set global configuration
get_config: Retrieve current values of the global configuration
"""
old_config = get_config().copy()
# also backup prefix
old_prefix = Prefix().copy()
set_config(**new_config)
try:
yield
finally:
set_config(**old_config)
prefix = Prefix()
prefix.clear()
prefix.update(old_prefix)
# ----------------------------------------------------------
class Singleton(type):
"""A Singleton metaclass
The singleton class calls the __init__ method each time the instance is requested.
From: https://stackoverflow.com/a/6798042/1286165
"""
_instances = {}
def __call__(cls, *args, **kwargs):
if cls not in cls._instances:
cls._instances[cls] = super().__call__(*args, **kwargs)
return cls._instances[cls]
class Prefix(dict, metaclass=Singleton):
def __init__(self, path=None, *args, **kwargs):
super().__init__(*args, **kwargs)
class PrefixMeta(type):
def __contains__(cls, key):
return f"{cls.asset_folder}/{key}" in Prefix()
def __getitem__(cls, key):
folder = f"{cls.asset_folder}/{key}"
prefix = Prefix()
# if cached, return it
if folder in prefix:
return prefix[folder]
# otherwise, load from prefix
asset = super().__call__(key)
return asset
def __setitem__(cls, key, value):
folder = f"{cls.asset_folder}/{key}"
prefix = Prefix()
prefix[folder] = value
This diff is collapsed.
......@@ -42,6 +42,7 @@ dataformat
Validation and parsing for dataformats
"""
import collections
import copy
import re
......@@ -49,9 +50,13 @@ import numpy
import simplejson as json
import six
from . import config
from . import utils
from .baseformat import baseformat
DATA_FORMAT_TYPE = "dataformat"
DATA_FORMAT_FOLDER = "dataformats"
# ----------------------------------------------------------
......@@ -60,28 +65,24 @@ class Storage(utils.Storage):
Parameters:
prefix (str): Establishes the prefix of your installation.
name (str): The name of the dataformat object in the format
``<user>/<name>/<version>``.
"""
asset_type = "dataformat"
asset_folder = "dataformats"
asset_type = DATA_FORMAT_TYPE
asset_folder = DATA_FORMAT_FOLDER
def __init__(self, prefix, name):
def __init__(self, name):
if name.count("/") != 2:
raise RuntimeError("invalid dataformat name: `%s'" % name)
self.username, self.name, self.version = name.split("/")
self.fullname = name
self.prefix = prefix
prefix = config.get_config()["prefix"]
path = utils.hashed_or_simple(
self.prefix, self.asset_folder, name, suffix=".json"
)
path = utils.hashed_or_simple(prefix, self.asset_folder, name, suffix=".json")
path = path[:-5]
super(Storage, self).__init__(path)
......@@ -93,7 +94,7 @@ class Storage(utils.Storage):
# ----------------------------------------------------------
class DataFormat(object):
class DataFormat(metaclass=config.PrefixMeta):
"""Data formats define the chunks of data that circulate between blocks.
Parameters:
......@@ -111,13 +112,6 @@ class DataFormat(object):
object that is this object's parent and the name of the field on that
object that points to this one.
dataformat_cache (:py:class:`dict`, Optional): A dictionary mapping
dataformat names to loaded dataformats. This parameter is optional and,
if passed, may greatly speed-up data format loading times as
dataformats that are already loaded may be re-used. If you use this
parameter, you must guarantee that the cache is refreshed as
appropriate in case the underlying dataformats change.
Attributes:
name (str): The full, valid name of this dataformat
......@@ -147,28 +141,28 @@ class DataFormat(object):
"""
def __init__(self, prefix, data, parent=None, dataformat_cache=None):
asset_type = DATA_FORMAT_TYPE
asset_folder = DATA_FORMAT_FOLDER
def _init(self):
self._name = None
self.storage = None
self.resolved = None
self.prefix = prefix
self.errors = []
self.data = None
self.resolved = None
self.referenced = {}
self.parent = parent
self.parent = None
# if the user has not provided a cache, still use one for performance
dataformat_cache = dataformat_cache if dataformat_cache is not None else {}
def __init__(self, data, parent=None):
try:
self._load(data, dataformat_cache)
finally:
if self._name is not None: # registers it into the cache, even if failed
dataformat_cache[self._name] = self
self._init()
self.parent = parent
self._load(data)
# cache in prefix
DataFormat[self.name] = self
def _load(self, data, dataformat_cache):
def _load(self, data):
"""Loads the dataformat"""
if isinstance(data, dict):
......@@ -176,7 +170,7 @@ class DataFormat(object):
self.data = data
else:
self._name = data
self.storage = Storage(self.prefix, data)
self.storage = Storage(data)
json_path = self.storage.json.path
if not self.storage.exists():
self.errors.append(
......@@ -196,7 +190,9 @@ class DataFormat(object):
)
return
dataformat_cache[self._name] = self # registers itself into the cache
self._resolve()
def _resolve(self):
self.resolved = copy.deepcopy(self.data)
......@@ -212,31 +208,29 @@ class DataFormat(object):
if is_reserved(key):
del self.resolved[key]
def maybe_load_format(name, obj, dataformat_cache):
def maybe_load_format(name, obj):
"""Tries to load a given dataformat from its relative path"""
if isinstance(obj, six.string_types) and obj.find("/") != -1: # load it
if obj in dataformat_cache: # reuse
if obj in DataFormat: # reuse
if dataformat_cache[obj] is None: # recursion detected
if DataFormat[obj] is None: # recursion detected
return self
self.referenced[obj] = dataformat_cache[obj]
self.referenced[obj] = DataFormat[obj]
else: # load it
self.referenced[obj] = DataFormat(
self.prefix, obj, (self, name), dataformat_cache
)
self.referenced[obj] = DataFormat(obj, (self, name))
return self.referenced[obj]
elif isinstance(obj, dict): # can cache it, must load from scratch
return DataFormat(self.prefix, obj, (self, name), dataformat_cache)
return DataFormat(obj, (self, name))
elif isinstance(obj, list):
retval = copy.deepcopy(obj)
retval[-1] = maybe_load_format(field, obj[-1], dataformat_cache)
retval[-1] = maybe_load_format(field, obj[-1])
return retval
return obj
......@@ -245,7 +239,7 @@ class DataFormat(object):
for field, value in self.data.items():
if field in ("#description", "#schema_version"):
continue # skip the description and schema version meta attributes
self.resolved[field] = maybe_load_format(field, value, dataformat_cache)
self.resolved[field] = maybe_load_format(field, value)
# at this point, there should be no more external references in
# ``self.resolved``. We treat the "#extends" property, which requires a
......@@ -253,13 +247,77 @@ class DataFormat(object):
if "#extends" in self.resolved:
ext = self.data["#extends"]
self.referenced[ext] = maybe_load_format(self._name, ext, dataformat_cache)
self.referenced[ext] = maybe_load_format(self._name, ext)
basetype = self.resolved["#extends"]
tmp = self.resolved
self.resolved = basetype.resolved
self.resolved.update(tmp)
del self.resolved["#extends"] # avoids infinite recursion
@classmethod
def new(
cls,
definition,
name,
description=None,
extends=None,
schema_version=None,
parent=None,
):
self = cls.__new__(cls)
self._init()
def str_or_dtype_or_type(v):
# if it is a dict
if isinstance(v, collections.abc.Mapping):
return {k: str_or_dtype_or_type(v_) for k, v_ in v.items()}
# if it's a list
if isinstance(v, collections.abc.Sequence):
return [v_ for v_ in v[:-1]] + [str_or_dtype_or_type(v[-1])]
# if it is another dataformat or a numpy.dtype
elif hasattr(v, "name"):
v = v.name
# if it is a str
elif v is str:
v = "string"
# if none of the above, convert to a numpy dtype and then to its name
else:
v = numpy.dtype(v).name
return v
data = str_or_dtype_or_type(definition)
if description is not None:
data["#description"] = description
if extends is not None:
data["#extends"] = extends.name
if schema_version is not None:
data["#schema_version"] = schema_version
self.data = data
if not name:
raise ValueError(f"Invalid {name}. The name should be a non-empty string!")
if name != "analysis:result" and "/" not in name:
name = f"{config.get_config()['user']}/{name}/1"
self._name = name
self.parent = parent
if name != "analysis:result":
self.storage = Storage(name)
self._resolve()
# cache in prefix
DataFormat[self.name] = self
return self
@property
def name(self):
"""Returns the name of this object, either from the filename or composed
......@@ -273,7 +331,7 @@ class DataFormat(object):
@name.setter
def name(self, value):
self._name = value
self.storage = Storage(self.prefix, value)
self.storage = Storage(value)
@property
def schema_version(self):
......@@ -426,7 +484,7 @@ class DataFormat(object):
obj = self.type()
if isinstance(data, dict):
obj.from_dict(data, casting="safe", add_defaults=False)
elif isinstance(data, six.string_types):
elif isinstance(data, bytes):
obj.unpack(data)
else:
obj.unpack_from(data)
......@@ -510,7 +568,7 @@ class DataFormat(object):
Raises:
RuntimeError: If prefix and self.prefix point to the same directory.
RuntimeError: If prefix and prefix point to the same directory.
"""
......@@ -520,7 +578,7 @@ class DataFormat(object):
if not self.valid:
raise RuntimeError("dataformat is not valid:\n{}".format(self.errors))
if prefix == self.prefix:
if prefix == prefix:
raise RuntimeError(
"Cannot export dataformat to the same prefix (" "%s)" % prefix
)
......
......@@ -44,47 +44,45 @@ Validation of database protocol templates
import simplejson as json
from . import config
from . import utils
from .dataformat import DataFormat
# ----------------------------------------------------------
PROTOCOL_TEMPLATE_TYPE = "protocoltemplate"
PROTOCOL_TEMPLATE_FOLDER = "protocoltemplates"
# ----------------------------------------------------------
class Storage(utils.Storage):
"""Resolves paths for protocol templates
Parameters:
prefix (str): Establishes the prefix of your installation.
name (str): The name of the protocol template object in the format
``<name>/<version>``.
"""
asset_type = "protocoltemplate"
asset_folder = "protocoltemplates"
asset_type = PROTOCOL_TEMPLATE_TYPE
asset_folder = PROTOCOL_TEMPLATE_FOLDER
def __init__(self, prefix, name):
def __init__(self, name):
if name.count("/") != 1:
raise RuntimeError("invalid protocol template name: `%s'" % name)
self.name, self.version = name.split("/")
self.fullname = name
self.prefix = prefix
path = utils.hashed_or_simple(
self.prefix, self.asset_folder, name, suffix=".json"
config.get_config()["prefix"], self.asset_folder, name, suffix=".json"
)
path = path[:-5]
super(Storage, self).__init__(path)
# ----------------------------------------------------------
class ProtocolTemplate(object):
class ProtocolTemplate(metaclass=config.PrefixMeta):
"""Protocol template define the design of the database.
......@@ -94,14 +92,6 @@ class ProtocolTemplate(object):
name (str): The fully qualified protocol template name (e.g. ``db/1``)
dataformat_cache (:py:class:`dict`, Optional): A dictionary mapping
dataformat names to loaded dataformats. This parameter is optional and,
if passed, may greatly speed-up database loading times as dataformats
that are already loaded may be re-used. If you use this parameter, you
must guarantee that the cache is refreshed as appropriate in case the
underlying dataformats change.
Attributes:
name (str): The full, valid name of this database
......@@ -111,27 +101,83 @@ class ProtocolTemplate(object):
"""
def __init__(self, prefix, name, dataformat_cache=None):
asset_type = PROTOCOL_TEMPLATE_TYPE
asset_folder = PROTOCOL_TEMPLATE_FOLDER
def _init(self):
self._name = None
self.prefix = prefix
self.dataformats = {} # preloaded dataformats
self.storage = None
self.errors = []
self.data = None
# if the user has not provided a cache, still use one for performance
dataformat_cache = dataformat_cache if dataformat_cache is not None else {}
def __init__(self, name):
self._init()
self._load(name)
# cache in prefix
ProtocolTemplate[self.name] = self
@classmethod
def new(
cls, sets, name, description=None, schema_version=None,
):
self = cls.__new__(cls)
self._init()
if not name:
raise ValueError(f"Invalid {name}. The name should be a non-empty string!")
if "/" not in name:
name = f"{name}/1"
self._name = name
def dataformat_name(v):
if hasattr(v, "name"):
v = v.name
return v
self._load(name, dataformat_cache)
for i, set_ in enumerate(sets):
sets[i]["outputs"] = {
k: dataformat_name(v) for k, v in set_["outputs"].items()
}
def _load(self, data, dataformat_cache):
data = dict(sets=sets)
if description is not None:
data["description"] = description
if schema_version is not None:
data["schema_version"] = schema_version
self.data = data
self.storage = Storage(name)
self._resolve()
# cache in prefix
ProtocolTemplate[self.name] = self
return self
def _resolve(self):
for set_ in self.data["sets"]:
for key, value in set_["outputs"].items():
if value in self.dataformats:
continue
dataformat = DataFormat[value]
self.dataformats[value] = dataformat
def _load(self, data):
"""Loads the protocol template"""
self._name = data
self.storage = Storage(self.prefix, self._name)
self.storage = Storage(self._name)
json_path = self.storage.json.path
if not self.storage.json.exists():
self.errors.append(
......@@ -149,21 +195,7 @@ class ProtocolTemplate(object):
"Protocol template declaration file invalid: %s" % error
)
return
for set_ in self.data["sets"]:
for key, value in set_["outputs"].items():
if value in self.dataformats:
continue
if value in dataformat_cache:
dataformat = dataformat_cache[value]
else:
dataformat = DataFormat(self.prefix, value)
dataformat_cache[value] = dataformat
self.dataformats[value] = dataformat
self._resolve()
@property
def name(self):
......@@ -174,7 +206,7 @@ class ProtocolTemplate(object):
@name.setter
def name(self, value):
self._name = value
self.storage = Storage(self.prefix, value)
self.storage = Storage(value)
@property
def description(self):
......@@ -285,7 +317,7 @@ class ProtocolTemplate(object):
Raises:
RuntimeError: If prefix and self.prefix point to the same directory.