Commit ff629f3e authored by André Anjos's avatar André Anjos 💬
Browse files

Merge branch '1.6.x' into 'master'

Merge development branch 1.6.x

Closes #47, #54, #55, and #51

See merge request !32
parents a8871304 2777b1eb
Pipeline #23581 failed with stages
in 51 minutes and 31 seconds
......@@ -21,3 +21,10 @@ opsnr.stt
.coverage
.DS_Store
html/
record.txt
_ci/
miniconda.sh
miniconda/
miniconda.cached/
conda/recipe_append.yaml
conda-bld/
# This build file uses template features from YAML so it is generic enough for
# any Bob project. Don't modify it unless you know what you're doing.
# Definition of global variables (all stages)
variables:
CONDA_ROOT: "${CI_PROJECT_DIR}/miniconda"
# Definition of our build pipeline order
stages:
- build
- docker
- deploy
- pypi
variables:
PREFIX: /opt/beat.env.web/usr
build:
# Build targets
.build_template: &build_job
stage: build
before_script:
- ${PREFIX}/bin/python --version
- docker info
- mkdir _ci
- curl --silent "https://gitlab.idiap.ch/bob/bob.admin/raw/master/gitlab/install.sh" > _ci/install.sh
- chmod 755 _ci/install.sh
- ./_ci/install.sh _ci master #installs ci support scripts
- ./_ci/before_build.sh
script:
- ./_ci/build.sh
after_script:
- ./_ci/after_build.sh
cache: &build_caches
paths:
- miniconda.sh
- ${CONDA_ROOT}/pkgs/*.tar.bz2
- ${CONDA_ROOT}/pkgs/urls.txt
.build_linux_template: &linux_build_job
<<: *build_job
tags:
- docker
image: continuumio/conda-concourse-ci
artifacts:
expire_in: 1 week
paths:
- _ci/
- ${CONDA_ROOT}/conda-bld/linux-64/*.tar.bz2
cache:
<<: *build_caches
key: "linux-cache"
.build_macosx_template: &macosx_build_job
<<: *build_job
tags:
- macosx
artifacts:
expire_in: 1 week
paths:
- _ci/
- ${CONDA_ROOT}/conda-bld/osx-64/*.tar.bz2
cache:
<<: *build_caches
key: "macosx-cache"
# Docker host based testing (must be run inside dind or docker-enabled host)
.docker_test_linux_template: &linux_docker_job
stage: docker
before_script:
# safe keep artifacts as before_build.sh will erase those...
- mv ${CONDA_ROOT}/conda-bld .
- ./_ci/install.sh _ci master #updates ci support scripts
- ./_ci/before_build.sh
- mv conda-bld ${CONDA_ROOT}
- ./scripts/before_test.sh
script:
- export BEAT_DOCKER_TESTS=true
- BOB_TEST_ONLY=true ./_ci/build.sh
after_script:
- ./_ci/after_build.sh
build_linux_27:
<<: *linux_build_job
variables:
PYTHON_VERSION: "2.7"
build_linux_36:
<<: *linux_build_job
variables:
PYTHON_VERSION: "3.6"
BUILD_EGG: "true"
artifacts:
expire_in: 1 week
paths:
- _ci/
- dist/*.zip
- sphinx
- ${CONDA_ROOT}/conda-bld/linux-64/*.tar.bz2
build_macosx_27:
<<: *macosx_build_job
variables:
PYTHON_VERSION: "2.7"
build_macosx_36:
<<: *macosx_build_job
variables:
PYTHON_VERSION: "3.6"
# Docker host based testing
docker_linux_27:
<<: *linux_docker_job
variables:
PYTHON_VERSION: "2.7"
dependencies:
- build_linux_27
tags:
- docker-build
docker_linux_36:
<<: *linux_docker_job
variables:
PYTHON_VERSION: "3.6"
dependencies:
- build_linux_36
tags:
- docker-build
# Deploy targets
.deploy_template: &deploy_job
stage: deploy
before_script:
- ./_ci/install.sh _ci master #updates ci support scripts
script:
- ./_ci/deploy.sh
dependencies:
- build_linux_27
- build_linux_36
- build_macosx_27
- build_macosx_36
tags:
- deployer
deploy_beta:
<<: *deploy_job
environment: beta
only:
- 1.6.x
deploy_stable:
<<: *deploy_job
environment: stable
only:
- /^v\d+\.\d+\.\d+([abc]\d*)?$/ # PEP-440 compliant version (tags)
except:
- branches
pypi:
stage: pypi
environment: pypi
only:
- /^v\d+\.\d+\.\d+([abc]\d*)?$/ # PEP-440 compliant version (tags)
except:
- branches
before_script:
- ./_ci/install.sh _ci master #updates ci support scripts
script:
- git clean -ffdx
- ${PREFIX}/bin/python bootstrap-buildout.py
- ./bin/buildout
- ./bin/python ${PREFIX}/bin/coverage run --source=${CI_PROJECT_NAME} ./bin/nosetests -sv ${CI_PROJECT_NAME}
- ./bin/python ${PREFIX}/bin/coverage report
- ./bin/python ${PREFIX}/bin/sphinx-apidoc --separate -d 2 --output=doc/api ${CI_PROJECT_NAMESPACE}
- ./bin/python ${PREFIX}/bin/sphinx-build doc sphinx
- ./_ci/pypi.sh
dependencies:
- build_linux_36
tags:
- docker-build
- deployer
include LICENSE.AGPL README.rst buildout.cfg bootstrap-buildout.py
include LICENSE.AGPL README.rst version.txt requirements.txt
include buildout.cfg develop.cfg
recursive-include scripts *.sh
recursive-include doc conf.py *.rst *.png *.svg *.ico *.odg *.pdf *.dot
recursive-include beat/core/schema *.json
recursive-include beat/core/prototypes *.json *.py
recursive-include beat/core/test/prefix *.json *.py
recursive-include beat/core/test/prefix *.json *.py *.r *.m *.rst *.h *.cpp
......@@ -20,159 +20,45 @@
.. You should have received a copy of the GNU Affero Public License along ..
.. with the BEAT platform. If not, see http://www.gnu.org/licenses/. ..
.. image:: https://img.shields.io/badge/docs-stable-yellow.svg
:target: https://www.idiap.ch/software/beat/docs/beat/beat.core/stable/index.html
.. image:: https://img.shields.io/badge/docs-latest-orange.svg
:target: https://www.idiap.ch/software/beat/docs/beat/beat.core/master/index.html
.. image:: https://gitlab.idiap.ch/beat/beat.core/badges/master/build.svg
:target: https://gitlab.idiap.ch/beat/beat.core/commits/master
.. image:: https://gitlab.idiap.ch/beat/beat.core/badges/master/coverage.svg
:target: https://gitlab.idiap.ch/beat/beat.core/commits/master
.. image:: https://img.shields.io/badge/gitlab-project-0000c0.svg
:target: https://gitlab.idiap.ch/beat/beat.core
.. image:: https://img.shields.io/pypi/v/beat.core.svg
:target: https://pypi.python.org/pypi/beat.core
============================================
Biometrics Evaluation and Testing Platform
============================================
This package contains the source code for the core components of the BEAT
platform.
==========================
Core Components for BEAT
==========================
This package part of BEAT_, an open-source evaluation platform for data science
algorithms and workflows. It contains the source code for its core components.
Installation
------------
Really easy, with ``zc.buildout``::
$ python bootstrap-buildout.py
$ ./bin/buildout
These 2 commands should download and install all non-installed dependencies and
get you a fully operational test and development environment.
.. note::
The python shell used in the first line of the previous command set
determines the python interpreter that will be used for all scripts developed
inside this package.
If you are on the Idiap filesystem, you may use
``/idiap/project/beat/beat.env.deploy/usr/bin/python`` to bootstrap this
package instead. It contains the same setup deployed at the final BEAT
machinery.
Docker
======
This package depends on Docker_ and uses it to run user algorithms in a
container with the required software stack. You must install the Docker_ engine
and make sure the user running tests has access to it.
In particular, this package controls memory and CPU utilisation of the
containers it launches. You must make sure to enable those functionalities on
your installation.
Docker Setup
============
Complete BEAT's `installation`_ instructions. Then, to install this package,
run::
Make sure you have the ``docker`` command available on your system. For certain
operating systems, it is necessary to install ``docker`` via an external
virtual machine (a.k.a. the *docker machine*). Follow the instructions at `the
docker website <https://docs.docker.com/engine/installation/>` before trying to
execute algorithms or experiments.
$ conda install beat.core
We use specific docker images to run user algorithms. Download the following
base images before you try to run tests or experiments on your computer::
$ docker pull docker.idiap.ch/beat/beat.env.system.python:1.1.2
$ docker pull docker.idiap.ch/beat/beat.env.db.examples:1.1.1
$ docker pull docker.idiap.ch/beat/beat.env.client:1.2.0
$ docker pull docker.idiap.ch/beat/beat.env.cxx:1.0.2
Optionally, also download the following images to be able to re-run experiments
downloaded from the BEAT platform (not required for unit testing)::
$ docker pull docker.idiap.ch/beat/beat.env.python:0.0.4
$ docker pull docker.idiap.ch/beat/beat.env.python:1.0.0
$ docker pull docker.idiap.ch/beat/beat.env.db:1.2.2
Documentation
-------------
To build the documentation, just do::
$ ./bin/sphinx-apidoc --separate -d 2 --output=doc/api beat beat/core/test beat/core/scripts
$ ./bin/sphinx-build doc sphinx
Testing
Contact
-------
After installation, it is possible to run our suite of unit tests. To do so,
use ``nose``::
$ ./bin/nosetests -sv
.. note::
Some of the tests for our command-line toolkit require a running BEAT
platform web-server, with a compatible ``beat.core`` installed (preferably
the same). By default, these tests will be skipped. If you want to run
them, you must setup a development web server and set the environment
variable ``BEAT_CORE_TEST_PLATFORM`` to point to that address. For example::
$ export BEAT_CORE_TEST_PLATFORM="http://example.com/platform/"
$ ./bin/nosetests -sv
It is **not** adviseable to run tests against a production web server.
If you want to skip slow tests (at least those pulling stuff from our servers)
or executing lengthy operations, just do::
$ ./bin/nosetests -sv -a '!slow'
To measure the test coverage, do the following::
$ ./bin/nosetests -sv --with-coverage --cover-package=beat.core
To produce an HTML test coverage report, at the directory `./htmlcov`, do the
following::
$ ./bin/nosetests -sv --with-coverage --cover-package=beat.core --cover-html --cover-html-dir=htmlcov
Our documentation is also interspersed with test units. You can run them using
sphinx::
$ ./bin/sphinx -b doctest doc sphinx
Development
-----------
Indentation
===========
You can enforce PEP8_ compliance using the application ``autopep8``. For
example, to enforce compliance on a single file and edit it in place, do::
$ ./bin/autopep8 --indent-size=2 --in-place beat/core/utils.py
We normally use 2-space indentation. If ever, you can easily change the
indentation to 4 spaces like this::
$ ./bin/autopep8 --indent-size=4 --in-place beat/core/utils.py
Profiling
=========
In order to profile the test code, try the following::
$ ./bin/python -mcProfile -oprof.data ./bin/nosetests -sv ...
This will dump the profiling data at ``prof.data``. You can dump its contents
in different ways using another command::
$ ./bin/python -mpstats prof.data
This will allow you to dump and print the profiling statistics as you may find
fit.
For questions or reporting issues to this software package, contact our
development `mailing list`_.
.. References go here
.. _pep8: https://www.python.org/dev/peps/pep-0008/
.. _docker: https://www.docker.com/
.. Place your references here:
.. _beat: https://www.idiap.ch/software/beat
.. _installation: https://www.idiap.ch/software/beat/install
.. _mailing list: https://www.idiap.ch/software/beat/discuss
......@@ -25,5 +25,6 @@
# #
###############################################################################
#see http://peak.telecommunity.com/DevCenter/setuptools#namespace-packages
__import__('pkg_resources').declare_namespace(__name__)
# see https://docs.python.org/3/library/pkgutil.html
from pkgutil import extend_path
__path__ = extend_path(__path__, __name__)
......@@ -25,15 +25,21 @@
# #
###############################################################################
"""
=========
algorithm
=========
"""Validation for algorithms"""
Validation for algorithms
Forward importing from :py:mod:`beat.backend.python.algorithm`
:py:class:`beat.backend.python.algorithm.Storage`
:py:class:`beat.backend.python.algorithm.Runner`
"""
import os
import sys
import six
import numpy
import simplejson
from . import dataformat
from . import library
......@@ -46,39 +52,38 @@ from beat.backend.python.algorithm import Runner
from beat.backend.python.algorithm import Algorithm as BackendAlgorithm
class Algorithm(BackendAlgorithm):
"""Algorithms represent runnable components within the platform.
This class can only parse the meta-parameters of the algorithm (i.e., input
and output declaration, grouping, synchronization details, parameters and
splittability). The actual algorithm is not directly treated by this class -
it can, however, provide you with a loader for actually running the
algorithmic code (see :py:meth:`Algorithm.runner`).
splittability). The actual algorithm is not directly treated by this class.
It can, however, provide you with a loader for actually running the
algorithmic code (see :py:meth:`.runner`).
Parameters:
prefix (str): Establishes the prefix of your installation.
data (object, optional): The piece of data representing the algorithm. It
must validate against the schema defined for algorithms. If a string is
passed, it is supposed to be a valid path to an algorithm in the
designated prefix area. If a tuple is passed (or a list), then we
consider that the first element represents the algorithm declaration,
while the second, the code for the algorithm (either in its source format
or as a binary blob). If ``None`` is passed, loads our default prototype
for algorithms (source code will be in Python).
data (:py:class:`object`, Optional): The piece of data representing the
algorithm. It must validate against the schema defined for algorithms.
If a string is passed, it is supposed to be a valid path to an
algorithm in the designated prefix area. If a tuple is passed (or a
list), then we consider that the first element represents the algorithm
declaration, while the second, the code for the algorithm (either in
its source format or as a binary blob). If ``None`` is passed, loads
our default prototype for algorithms (source code will be in Python).
dataformat_cache (dict, optional): A dictionary mapping dataformat names to
loaded dataformats. This parameter is optional and, if passed, may
greatly speed-up algorithm loading times as dataformats that are already
loaded may be re-used.
dataformat_cache (:py:class:`dict`, Optional): A dictionary mapping
dataformat names to loaded dataformats. This parameter is optional and,
if passed, may greatly speed-up algorithm loading times as dataformats
that are already loaded may be re-used.
library_cache (dict, optional): A dictionary mapping library names to
loaded libraries. This parameter is optional and, if passed, may greatly
speed-up library loading times as libraries that are already loaded may
be re-used.
library_cache (:py:class:`dict`, Optional): A dictionary mapping library
names to loaded libraries. This parameter is optional and, if passed,
may greatly speed-up library loading times as libraries that are
already loaded may be re-used.
Attributes:
......@@ -93,33 +98,34 @@ class Algorithm(BackendAlgorithm):
storage (object): A simple object that provides information about file
paths for this algorithm
dataformats (dict): A dictionary containing all pre-loaded dataformats used
by this algorithm. Data format objects will be of type
dataformats (dict): A dictionary containing all pre-loaded dataformats
used by this algorithm. Data format objects will be of type
:py:class:`beat.core.dataformat.DataFormat`.
libraries (dict): A mapping object defining other libraries this algorithm
needs to load so it can work properly.
libraries (dict): A mapping object defining other libraries this
algorithm needs to load so it can work properly.
uses (dict): A mapping object defining the required library import name
(keys) and the full-names (values).
parameters (dict): A dictionary containing all pre-defined parameters that
this algorithm accepts.
parameters (dict): A dictionary containing all pre-defined parameters
that this algorithm accepts.
splittable (bool): A boolean value that indicates if this algorithm is
automatically parallelizeable by our backend.
input_map (dict): A dictionary where the key is the input name and the
value, its type. All input names (potentially from different groups) are
comprised in this dictionary.
value, its type. All input names (potentially from different groups)
are comprised in this dictionary.
output_map (dict): A dictionary where the key is the output name and the
value, its type. All output names (potentially from different groups) are
comprised in this dictionary.
value, its type. All output names (potentially from different groups)
are comprised in this dictionary.
results (dict): If this algorithm is actually an analyzer (i.e., there are
no formal outputs, but results that must be saved by the platform), then
this dictionary contains the names and data types of those elements.
results (dict): If this algorithm is actually an analyzer (i.e., there
are no formal outputs, but results that must be saved by the platform),
then this dictionary contains the names and data types of those
elements.
groups (dict): A list containing dictionaries with inputs and outputs
belonging to the same synchronization group.
......@@ -148,21 +154,21 @@ class Algorithm(BackendAlgorithm):
self._name = None
self.storage = None
self.dataformats = {} # preloaded dataformats
self.libraries = {} # preloaded libraries
self.dataformats = {} # preloaded dataformats
self.libraries = {} # preloaded libraries
code = None
if data is None: #loads prototype and validates it
if data is None: # loads prototype and validates it
data = None
code = None
elif isinstance(data, (tuple, list)): #user has passed individual info
elif isinstance(data, (tuple, list)): # user has passed individual info
data, code = data #break down into two components
data, code = data # break down into two components
if isinstance(data, six.string_types): #user has passed a file pointer
if isinstance(data, six.string_types): # user has passed a file pointer
self._name = data
self.storage = Storage(self.prefix, self._name)
......@@ -170,21 +176,21 @@ class Algorithm(BackendAlgorithm):
self.errors.append('Algorithm declaration file not found: %s' % data)
return
data = self.storage.json.path #loads data from JSON declaration
data = self.storage.json.path # loads data from JSON declaration
# At this point, `data' can be a dictionary or ``None``
if data is None: # loads the default declaration for an algorithm
if data is None: # loads the default declaration for an algorithm
self.data, self.errors = prototypes.load('algorithm')
assert not self.errors, "\n * %s" % "\n *".join(self.errors)
else: # just assign it
else: # just assign it
# this runs basic validation, including JSON loading if required
self.data, self.errors = schema.validate('algorithm', data)
if self.errors: return #don't proceed with the rest of validation
if self.errors: return # don't proceed with the rest of validation
if self.storage is not None: #loading from the disk, check code
if self.storage is not None: # loading from the disk, check code
if not self.storage.code.exists():
if self.data['language'] != 'cxx':
self.errors.append('Algorithm code not found: %s' % \
......@@ -195,15 +201,15 @@ class Algorithm(BackendAlgorithm):
# At this point, `code' can be a string (or a binary blob) or ``None``
if code is None: # loads the default code for an algorithm
if code is None: # loads the default code for an algorithm
self.code = prototypes.binary_load('algorithm.py')
self.data['language'] = 'python'
else: # just assign it - notice that in this case, no language is set
else: # just assign it - notice that in this case, no language is set
self.code = code
if self.errors: return #don't proceed with the rest of validation
if self.errors: return # don't proceed with the rest of validation
# if no errors so far, make sense out of the declaration data
......@@ -255,11 +261,11 @@ class Algorithm(BackendAlgorithm):
for name, input in group['inputs'].items():
if input['type'] in self.dataformats: continue
if dataformat_cache and input['type'] in dataformat_cache: #reuse
if dataformat_cache and input['type'] in dataformat_cache: # reuse
thisformat = dataformat_cache[input['type']]