Commit 183dc834 authored by Samuel GAIST's avatar Samuel GAIST

[database] Implement v2

parent 99615244
......@@ -102,6 +102,15 @@ class Database(BackendDatabase):
def __init__(self, prefix, data, dataformat_cache=None):
super(Database, self).__init__(prefix, data, dataformat_cache)
def _validate_view(self, view_name):
if view_name.find(".") != -1 or view_name.find(os.sep) != -1:
self.errors.append(
"dataset views are required to sit inside the "
"database root folder, but `%s' is either in a "
"subdirectory or points to a python module, what is "
"unsupported by this version" % (view_name)
)
def _load(self, data, dataformat_cache):
"""Loads the database"""
......@@ -150,7 +159,7 @@ class Database(BackendDatabase):
self._validate_semantics(dataformat_cache)
def _validate_semantics(self, dataformat_cache):
"""Validates all sematical aspects of the database"""
"""Validates all semantical aspects of the database"""
# all protocol names must be unique
protocol_names = [k["name"] for k in self.data["protocols"]]
......@@ -161,7 +170,7 @@ class Database(BackendDatabase):
# all set names within a protocol must be unique
for protocol in self.data["protocols"]:
set_names = [k["name"] for k in protocol["sets"]]
set_names = self.set_names(protocol["name"])
if len(set_names) != len(set(set_names)):
self.errors.append(
"found different sets with the same name at protocol "
......@@ -169,9 +178,9 @@ class Database(BackendDatabase):
)
# all outputs must have valid data types
for _set in protocol["sets"]:
for _, set_ in self.sets(protocol["name"]).items():
for key, value in _set["outputs"].items():
for key, value in set_["outputs"].items():
if value in self.dataformats:
continue
......@@ -191,17 +200,16 @@ class Database(BackendDatabase):
% (
value,
key,
_set["name"],
set_["name"],
protocol["name"],
"\n".join(dataformat.errors),
)
)
# all view names must be relative to the database root path
if _set["view"].find(".") != -1 or _set["view"].find(os.sep) != -1:
self.errors.append(
"dataset views are required to sit inside the "
"database root folder, but `%s' is either in a "
"subdirectory or points to a python module, what is "
"unsupported by this version" % (_set["view"],)
)
if self.schema_version == 1:
self._validate_view(set_["view"])
if self.schema_version != 1:
for view in protocol["views"].keys():
self._validate_view(view)
{
"$schema": "http://json-schema.org/draft-04/schema#",
"title": "Database descriptor v2",
"description": "This schema defines the properties of a version 2 BEAT database",
"type": "object",
"properties": {
"root_folder": {
"type": "string",
"pattern": "^((file://)?(/[^/]+)+|nfs://[a-z0-9._-]+:(/[^/]+)+)$"
},
"protocols": {
"type": "array",
"minItems": 1,
"uniqueItems": true,
"items": { "$ref": "#/definitions/protocol" }
},
"description": { "$ref": "../common/1.json#/definitions/description" },
"schema_version": { "const": 2 }
},
"required": [
"root_folder",
"protocols",
"schema_version"
],
"additionalProperties": false,
"definitions": {
"template_identifier": {
"type": "string",
"pattern": "^[a-zA-Z0-9_-]+/[0-9]+$"
},
"protocol": {
"type": "object",
"properties": {
"name": { "$ref": "#/definitions/protocol_name" },
"template": { "$ref": "#/definitions/template_identifier" },
"views": { "$ref": "#/definitions/views" }
},
"required": ["name", "views", "template"],
"additionalProperties": false
},
"protocol_name": {
"type": "string",
"pattern": "^[a-zA-Z0-9_][\\.a-zA-Z0-9_-]*$"
},
"view_name": {
"type": "string",
"pattern": "^[a-zA-Z_][a-zA-Z0-9_]*$"
},
"views": {
"type": "object",
"minProperties": 1,
"uniqueItems": true,
"patternProperties": {
"^[a-zA-Z_][a-zA-Z0-9_]*$": { "$ref": "#/definitions/view" }
}
},
"view": {
"type": "object",
"properties": {
"view": { "$ref": "#definitions/view_name" },
"parameters": { "$ref": "#/definitions/parameters" }
},
"additionalProperties": false
},
"parameters": {
"type": "object",
"patternProperties": {
"^[a-zA-Z_][a-zA-Z0-9_-]*$": {
"$ref": "#/definitions/parameter_value"
}
}
},
"parameter_value": {
"oneOf": [
{
"type": "string"
},
{
"type": "number"
},
{
"type": "boolean"
}
]
}
}
}
{
"root_folder": "/tmp/path/not/set",
"protocols": [
{
"name": "large",
"template": "large/1",
"views": {
"data": {
"view": "LargeView",
"parameters": {}
}
}
},
{
"name": "small",
"template": "small/1",
"views": {
"data": {
"view": "SmallView",
"parameters": {}
}
}
}
],
"schema_version": 2,
"description": ""
}
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
###################################################################################
# #
# Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/ #
# Contact: beat.support@idiap.ch #
# #
# Redistribution and use in source and binary forms, with or without #
# modification, are permitted provided that the following conditions are met: #
# #
# 1. Redistributions of source code must retain the above copyright notice, this #
# list of conditions and the following disclaimer. #
# #
# 2. Redistributions in binary form must reproduce the above copyright notice, #
# this list of conditions and the following disclaimer in the documentation #
# and/or other materials provided with the distribution. #
# #
# 3. Neither the name of the copyright holder nor the names of its contributors #
# may be used to endorse or promote products derived from this software without #
# specific prior written permission. #
# #
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND #
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED #
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE #
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE #
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL #
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR #
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER #
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, #
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE #
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #
# #
###################################################################################
import numpy
from collections import namedtuple
from beat.backend.python.database import View
# ----------------------------------------------------------
class LargeView(View):
def __init__(self):
super(LargeView, self).__init__()
numpy.random.seed(0) # So it is kept reproducible
def index(self, root_folder, parameters):
Entry = namedtuple("Entry", ["out"])
entries = []
for i in range(0, 1000):
entries.append(Entry(numpy.int32(numpy.random.randint(100, size=(1000,)))))
return entries
def get(self, output, index):
obj = self.objs[index]
if output == "out":
return {"value": obj.out}
# ----------------------------------------------------------
class SmallView(View):
def __init__(self):
super(SmallView, self).__init__()
numpy.random.seed(0) # So it is kept reproducible
def index(self, root_folder, parameters):
Entry = namedtuple("Entry", ["out"])
entries = []
for i in range(0, 1000):
entries.append(Entry(numpy.int32(numpy.random.randint(0, 100))))
return entries
def get(self, output, index):
obj = self.objs[index]
if output == "out":
return {"value": obj.out}
{
"root_folder": "/tmp/foo/bar",
"protocols": [
{
"name": "protocol",
"template": "protocol/1",
"views": {
"set": {
"view": "View"
},
"set2": {
"view": "View2"
}
}
},
{
"name": "protocol2",
"template": "protocol2/1",
"views": {
"set": {
"view": "LargeView"
},
"set2": {
"view": "View2"
}
}
}
],
"schema_version": 2
}
class View:
def setup(
self,
root_folder,
outputs,
parameters,
force_start_index=None,
force_end_index=None,
):
"""Initializes the database"""
return True
def done(self):
"""Should return ``True``, when data is finished"""
return True
def next(self):
"""Loads the next data block on ``outputs``"""
return True
{
"schema_version": 1,
"sets": [
{
"name": "data",
"outputs": {
"out": "user/empty_1d_array_of_integers/1"
}
}
]
}
{
"schema_version": 1,
"sets": [
{
"name": "set",
"outputs": {
"out": "user/single_integer/1"
}
},
{
"name": "set2",
"outputs": {
"out": "user/single_integer/1"
}
}
]
}
{
"schema_version": 1,
"sets": [
{
"name": "set",
"outputs": {
"out": "user/single_integer/1"
}
},
{
"name": "set2",
"outputs": {
"out": "user/single_integer/1"
}
}
]
}
{
"schema_version": 1,
"sets": [
{
"name": "data",
"outputs": {
"out": "user/single_integer/1"
}
}
]
}
......@@ -41,15 +41,21 @@ from . import prefix, tmp_prefix
from .utils import cleanup
@nose.tools.with_setup(teardown=cleanup)
def test_export():
for i in range(1, 3):
yield export, f"integers_db/{i}"
yield export, f"simple/{i}"
yield export, f"large/{i}"
@nose.tools.with_setup(teardown=cleanup)
def export(db_name):
name = "integers_db/1"
obj = Database(prefix, name)
obj = Database(prefix, db_name)
nose.tools.assert_true(obj.valid, "\n * %s" % "\n * ".join(obj.errors))
obj.export(tmp_prefix)
# load from tmp_prefix and validates
exported = Database(tmp_prefix, name)
exported = Database(tmp_prefix, db_name)
nose.tools.assert_true(exported.valid, "\n * %s" % "\n * ".join(exported.errors))
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment