Commit 2bc5215d authored by André Anjos's avatar André Anjos 💬

Merge branch '64_protocol_template' into 'master'

Protocol template

See merge request !67
parents 18c882ae d1b03a6c
Pipeline #29377 passed with stages
in 26 minutes and 47 seconds
......@@ -102,6 +102,15 @@ class Database(BackendDatabase):
def __init__(self, prefix, data, dataformat_cache=None):
super(Database, self).__init__(prefix, data, dataformat_cache)
def _validate_view(self, view_name):
if view_name.find(".") != -1 or view_name.find(os.sep) != -1:
self.errors.append(
"dataset views are required to sit inside the "
"database root folder, but `%s' is either in a "
"subdirectory or points to a python module, what is "
"unsupported by this version" % (view_name)
)
def _load(self, data, dataformat_cache):
"""Loads the database"""
......@@ -150,7 +159,7 @@ class Database(BackendDatabase):
self._validate_semantics(dataformat_cache)
def _validate_semantics(self, dataformat_cache):
"""Validates all sematical aspects of the database"""
"""Validates all semantical aspects of the database"""
# all protocol names must be unique
protocol_names = [k["name"] for k in self.data["protocols"]]
......@@ -161,7 +170,7 @@ class Database(BackendDatabase):
# all set names within a protocol must be unique
for protocol in self.data["protocols"]:
set_names = [k["name"] for k in protocol["sets"]]
set_names = self.set_names(protocol["name"])
if len(set_names) != len(set(set_names)):
self.errors.append(
"found different sets with the same name at protocol "
......@@ -169,9 +178,9 @@ class Database(BackendDatabase):
)
# all outputs must have valid data types
for _set in protocol["sets"]:
for _, set_ in self.sets(protocol["name"]).items():
for key, value in _set["outputs"].items():
for key, value in set_["outputs"].items():
if value in self.dataformats:
continue
......@@ -191,17 +200,16 @@ class Database(BackendDatabase):
% (
value,
key,
_set["name"],
set_["name"],
protocol["name"],
"\n".join(dataformat.errors),
)
)
# all view names must be relative to the database root path
if _set["view"].find(".") != -1 or _set["view"].find(os.sep) != -1:
self.errors.append(
"dataset views are required to sit inside the "
"database root folder, but `%s' is either in a "
"subdirectory or points to a python module, what is "
"unsupported by this version" % (_set["view"],)
)
if self.schema_version == 1:
self._validate_view(set_["view"])
if self.schema_version != 1:
for view in protocol["views"].keys():
self._validate_view(view)
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
###################################################################################
# #
# Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/ #
# Contact: beat.support@idiap.ch #
# #
# Redistribution and use in source and binary forms, with or without #
# modification, are permitted provided that the following conditions are met: #
# #
# 1. Redistributions of source code must retain the above copyright notice, this #
# list of conditions and the following disclaimer. #
# #
# 2. Redistributions in binary form must reproduce the above copyright notice, #
# this list of conditions and the following disclaimer in the documentation #
# and/or other materials provided with the distribution. #
# #
# 3. Neither the name of the copyright holder nor the names of its contributors #
# may be used to endorse or promote products derived from this software without #
# specific prior written permission. #
# #
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND #
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED #
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE #
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE #
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL #
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR #
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER #
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, #
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE #
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #
# #
###################################################################################
"""
================
protocoltemplate
================
Validation of protocoltemplate
Forward importing from :py:mod:`beat.backend.python.protocoltemplate`:
:py:class:`beat.backend.python.protocoltemplate.Storage`
"""
import six
from . import schema
from beat.backend.python.protocoltemplate import Storage
from beat.backend.python.protocoltemplate import (
ProtocolTemplate as BackendProtocolTemplate,
)
class ProtocolTemplate(BackendProtocolTemplate):
"""Protocol template define the design of the database.
Parameters:
prefix (str): Establishes the prefix of your installation.
data (dict, str): The piece of data representing the protocol templates.
It must validate against the schema defined for protocol templates. If a
string is passed, it is supposed to be a valid path to protocol template
in the designated prefix area.
dataformat_cache (:py:class:`dict`, Optional): A dictionary mapping
dataformat names to loaded dataformats. This parameter is optional and,
if passed, may greatly speed-up protocol template loading times as
dataformats that are already loaded may be re-used. If you use this
parameter, you must guarantee that the cache is refreshed as appropriate
in case the underlying dataformats change.
Attributes:
name (str): The full, valid name of this protocol template
description (str): The short description string, loaded from the JSON
file if one was set.
documentation (str): The full-length docstring for this object.
storage (object): A simple object that provides information about file
paths for this protocol template
errors (list): A list containing errors found while loading this
protocol template.
data (dict): The original data for this protocol template, as loaded by
our JSON decoder.
"""
def __init__(self, prefix, data, dataformat_cache=None):
super(ProtocolTemplate, self).__init__(prefix, data, dataformat_cache)
def _load(self, data, dataformat_cache):
"""Loads the database"""
self._name = None
self.storage = None
self.dataformats = {} # preloaded dataformats
if isinstance(data, six.string_types): # user has passed a file pointer
self._name = data
self.storage = Storage(self.prefix, self._name)
data = self.storage.json.path
if not self.storage.json.exists():
self.errors.append(
"Protocol template declaration file not found: %s" % data
)
return
# this runs basic validation, including JSON loading if required
self.data, self.errors = schema.validate("protocoltemplate", data)
if self.errors:
return # don't proceed with the rest of validation
......@@ -94,7 +94,11 @@ def load_schema(schema_name, version=1):
with open(fname, "rb") as f:
data = f.read().decode()
schema = json.loads(data)
try:
schema = json.loads(data)
except json.errors.JSONDecodeError:
print("Invalid json:\n {data}".format(data))
raise
basedir = os.path.realpath(os.path.dirname(fname))
resolver = jsonschema.RefResolver("file://" + basedir + "/", schema)
......
{
"$schema": "http://json-schema.org/draft-04/schema#",
"title": "Database descriptor v2",
"description": "This schema defines the properties of a version 2 BEAT database",
"type": "object",
"properties": {
"root_folder": {
"type": "string",
"pattern": "^((file://)?(/[^/]+)+|nfs://[a-z0-9._-]+:(/[^/]+)+)$"
},
"protocols": {
"type": "array",
"minItems": 1,
"uniqueItems": true,
"items": { "$ref": "#/definitions/protocol" }
},
"description": { "$ref": "../common/1.json#/definitions/description" },
"schema_version": { "const": 2 }
},
"required": [
"root_folder",
"protocols",
"schema_version"
],
"additionalProperties": false,
"definitions": {
"template_identifier": {
"type": "string",
"pattern": "^[a-zA-Z0-9_-]+/[0-9]+$"
},
"protocol": {
"type": "object",
"properties": {
"name": { "$ref": "#/definitions/protocol_name" },
"template": { "$ref": "#/definitions/template_identifier" },
"views": { "$ref": "#/definitions/views" }
},
"required": ["name", "views", "template"],
"additionalProperties": false
},
"protocol_name": {
"type": "string",
"pattern": "^[a-zA-Z0-9_][\\.a-zA-Z0-9_-]*$"
},
"view_name": {
"type": "string",
"pattern": "^[a-zA-Z_][a-zA-Z0-9_]*$"
},
"views": {
"type": "object",
"minProperties": 1,
"uniqueItems": true,
"patternProperties": {
"^[a-zA-Z_][a-zA-Z0-9_]*$": { "$ref": "#/definitions/view" }
}
},
"view": {
"type": "object",
"properties": {
"view": { "$ref": "#definitions/view_name" },
"parameters": { "$ref": "#/definitions/parameters" }
},
"additionalProperties": false
},
"parameters": {
"type": "object",
"patternProperties": {
"^[a-zA-Z_][a-zA-Z0-9_-]*$": {
"$ref": "#/definitions/parameter_value"
}
}
},
"parameter_value": {
"oneOf": [
{
"type": "string"
},
{
"type": "number"
},
{
"type": "boolean"
}
]
}
}
}
{
"$schema": "http://json-schema.org/draft-04/schema#",
"title": "Database Protocol descriptor",
"description": "This schema defines the properties of a BEAT database protocol",
"type": "object",
"properties": {
"description": { "$ref": "../common/1.json#/definitions/description" },
"schema_version": { "const": 1 },
"sets": {
"type": "array",
"minItems": 1,
"uniqueItems": true,
"items": { "$ref": "#/definitions/set" }
}
},
"required": [
"schema_version", "sets"
],
"additionalProperties": false,
"definitions": {
"identifier": {
"type": "string",
"pattern": "^[a-zA-Z_][a-zA-Z0-9_-]*$"
},
"set": {
"type": "object",
"properties": {
"name": { "$ref": "#/definitions/identifier" },
"outputs": {
"type": "object",
"patternProperties": {
"^[a-zA-Z_][a-zA-Z0-9_-]*$": {
"$ref": "../common/1.json#/definitions/reference"
}
},
"minProperties": 1,
"uniqueItems": true,
"additionalProperties": false
}
},
"required": ["name", "outputs"],
"additionalProperties": false
}
}
}
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
###################################################################################
# #
# Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/ #
# Contact: beat.support@idiap.ch #
# #
# Redistribution and use in source and binary forms, with or without #
# modification, are permitted provided that the following conditions are met: #
# #
# 1. Redistributions of source code must retain the above copyright notice, this #
# list of conditions and the following disclaimer. #
# #
# 2. Redistributions in binary form must reproduce the above copyright notice, #
# this list of conditions and the following disclaimer in the documentation #
# and/or other materials provided with the distribution. #
# #
# 3. Neither the name of the copyright holder nor the names of its contributors #
# may be used to endorse or promote products derived from this software without #
# specific prior written permission. #
# #
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND #
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED #
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE #
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE #
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL #
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR #
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER #
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, #
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE #
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #
# #
###################################################################################
"""Migrate a v1 database to v2
Usage:
%(prog)s [-v ... | --verbose ...] [--prefix=<path>][-f|--force]
<database_identifier>
%(prog)s (--help | -h)
%(prog)s (--version | -V)
Options:
-h, --help Show this screen
-V, --version Show version
-v, --verbose Increases the output verbosity level
-p, --prefix=<path> Path where the prefix is contained [default: .]
"""
import os
import sys
import copy
from docopt import docopt
from ..version import __version__
from ..database import Database, Storage as DBStorage
from ..protocoltemplate import ProtocolTemplate, Storage as PTStorage
from ..utils import setup_logging
def main(argv=None):
if argv is None:
argv = sys.argv[1:]
prog = os.path.basename(sys.argv[0])
completions = dict(prog=prog, version=__version__)
args = docopt(
__doc__ % completions,
argv=argv,
options_first=True,
version="v%s" % __version__,
)
logger = setup_logging(args["--verbose"], __name__, __name__)
prefix = args["--prefix"] if args["--prefix"] is not None else "."
if not os.path.exists(prefix):
logger.error("Prefix not found at: '%s'", prefix)
return 1
database_identifier = args["<database_identifier>"]
database = Database(prefix, database_identifier)
if not database.valid:
logger.error("Invalid database: '%s'", "\n".join(database.errors))
return 1
if database.schema_version != 1:
logger.error("Can't migrate database is not v1")
return 1
db_name, db_version = database_identifier.split("/")
new_db_name = f"{db_name}/{int(db_version) + 1}"
db_storage = DBStorage(prefix, new_db_name)
if db_storage.exists():
logger.error(f"Database already exists: {new_db_name}")
return 1
database_json = copy.deepcopy(database.data)
database_json["schema_version"] = 2
database_json["protocols"] = []
for protocol in database.protocols:
sets = database.sets(protocol)
set_list = []
views = {}
for _, set_ in sets.items():
views[set_["name"]] = {
"view": set_["view"],
"parameters": set_.get("parameters", {}),
}
for key in ["template", "view", "parameters"]:
if key in set_:
set_.pop(key)
set_list.append(set_)
template = {"schema_version": 1, "sets": set_list}
pt_name = f"{protocol}/1"
pt_storage = PTStorage(prefix, pt_name)
if pt_storage.exists():
logger.info(f"Protocol template already exists: {pt_name}")
else:
protocol_template = ProtocolTemplate(prefix, template)
if not protocol_template.valid:
logger.error(
"Invalid protocol created:", "\n".join(protocol_template.errors)
)
return 1
else:
protocol_template.write(pt_storage)
protocol_entry = {"name": protocol, "template": pt_name, "views": views}
database_json["protocols"].append(protocol_entry)
new_database = Database(prefix, database_json)
if not new_database.valid:
logger.error("Invalid database created:", "\n".join(new_database.errors))
return 1
else:
new_database.code = database.code
new_database.description = (
database.description if database.description is not None else ""
)
new_database.write(db_storage)
if __name__ == "__main__":
main()
{
"root_folder": "/tmp/path/not/set",
"protocols": [
{
"name": "large",
"template": "large/1",
"views": {
"data": {
"view": "LargeView",
"parameters": {}
}
}
},
{
"name": "small",
"template": "small/1",
"views": {
"data": {
"view": "SmallView",
"parameters": {}
}
}
}
],
"schema_version": 2,
"description": ""
}
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
###################################################################################
# #
# Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/ #
# Contact: beat.support@idiap.ch #
# #
# Redistribution and use in source and binary forms, with or without #
# modification, are permitted provided that the following conditions are met: #
# #
# 1. Redistributions of source code must retain the above copyright notice, this #
# list of conditions and the following disclaimer. #
# #
# 2. Redistributions in binary form must reproduce the above copyright notice, #
# this list of conditions and the following disclaimer in the documentation #
# and/or other materials provided with the distribution. #
# #
# 3. Neither the name of the copyright holder nor the names of its contributors #
# may be used to endorse or promote products derived from this software without #
# specific prior written permission. #
# #
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND #
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED #
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE #
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE #
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL #
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR #
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER #
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, #
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE #
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #
# #
###################################################################################
import numpy
from collections import namedtuple
from beat.backend.python.database import View
# ----------------------------------------------------------
class LargeView(View):
def __init__(self):
super(LargeView, self).__init__()
numpy.random.seed(0) # So it is kept reproducible
def index(self, root_folder, parameters):
Entry = namedtuple("Entry", ["out"])
entries = []
for i in range(0, 1000):
entries.append(Entry(numpy.int32(numpy.random.randint(100, size=(1000,)))))
return entries
def get(self, output, index):
obj = self.objs[index]
if output == "out":
return {"value": obj.out}
# ----------------------------------------------------------
class SmallView(View):
def __init__(self):
super(SmallView, self).__init__()
numpy.random.seed(0) # So it is kept reproducible
def index(self, root_folder, parameters):
Entry = namedtuple("Entry", ["out"])
entries = []
for i in range(0, 1000):
entries.append(Entry(numpy.int32(numpy.random.randint(0, 100))))
return entries
def get(self, output, index):
obj = self.objs[index]
if output == "out":
return {"value": obj.out}
{
"root_folder": "/tmp/foo/bar",
"protocols": [
{
"name": "protocol",
"template": "protocol/1",
"views": {
"set": {
"view": "View"
},
"set2": {
"view": "View2"
}
}
},
{
"name": "protocol2",
"template": "protocol2/1",
"views": {
"set": {
"view": "LargeView"
},
"set2": {
"view": "View2"
}
}
}
],
"schema_version": 2
}
class View:
def setup(
self,
root_folder,
outputs,
parameters,
force_start_index=None,
force_end_index=None,
):
"""Initializes the database"""
return True
def done(self):
"""Should return ``True``, when data is finished"""
return True
def next(self):
"""Loads the next data block on ``outputs``"""
return True