Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
beat.web
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
beat
beat.web
Commits
706bdf51
Commit
706bdf51
authored
7 years ago
by
Philip ABBET
Browse files
Options
Downloads
Patches
Plain Diff
[experiments, backend] Move all scheduling-related code into helper functions of the backend module
parent
6ef27ec3
No related branches found
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
beat/web/backend/helpers.py
+103
-0
103 additions, 0 deletions
beat/web/backend/helpers.py
beat/web/experiments/models/block.py
+0
-55
0 additions, 55 deletions
beat/web/experiments/models/block.py
beat/web/experiments/models/experiment.py
+3
-27
3 additions, 27 deletions
beat/web/experiments/models/experiment.py
with
106 additions
and
82 deletions
beat/web/backend/helpers.py
0 → 100755
+
103
−
0
View file @
706bdf51
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
###############################################################################
# #
# Copyright (c) 2016 Idiap Research Institute, http://www.idiap.ch/ #
# Contact: beat.support@idiap.ch #
# #
# This file is part of the beat.web module of the BEAT platform. #
# #
# Commercial License Usage #
# Licensees holding valid commercial BEAT licenses may use this file in #
# accordance with the terms contained in a written agreement between you #
# and Idiap. For further information contact tto@idiap.ch #
# #
# Alternatively, this file may be used under the terms of the GNU Affero #
# Public License version 3 as published by the Free Software and appearing #
# in the file LICENSE.AGPL included in the packaging of this file. #
# The BEAT platform is distributed in the hope that it will be useful, but #
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY #
# or FITNESS FOR A PARTICULAR PURPOSE. #
# #
# You should have received a copy of the GNU Affero Public License along #
# with the BEAT platform. If not, see http://www.gnu.org/licenses/. #
# #
###############################################################################
from
django.db
import
transaction
from
..experiments.models
import
Experiment
from
..experiments.models
import
Block
from
.models
import
Job
@transaction.atomic
def
schedule_experiment
(
experiment
):
'''
Schedules the experiment for execution at the backend
Scheduling an experiment only means creating one :py:class:`.models.Job`
instance for each block of the experiment.
This function is expected to be called on the web server. The Scheduler
is tasked to notice the newly-scheduled experiment and execute it.
'''
# Lock the experiment, so nobody else can modify it
experiment
=
Experiment
.
objects
.
select_for_update
().
get
(
pk
=
experiment
.
pk
)
# Can't schedule an experiment not in the PENDING state
if
experiment
.
status
!=
Experiment
.
PENDING
:
return
# Check that the queues and environments of all the blocks are still valid
for
block
in
experiment
.
blocks
.
all
():
if
block
.
queue
is
None
:
raise
RuntimeError
(
"
Block `%s
'
does not have a queue assigned
"
\
"
- this normally indicates the originally selected
"
\
"
queue was deleted since the experiment was first
"
\
"
configured. Re-configure this experiment and select a new
"
\
"
default or block-specific queue
"
%
block
.
name
)
if
block
.
environment
is
None
:
raise
RuntimeError
(
"
Block `%s
'
does not have an environment
"
\
"
assigned - this normally indicates the originally selected
"
\
"
environment was deleted since the experiment was first
"
\
"
configured. Re-configure this experiment and select a new
"
\
"
default or block-specific environment
"
%
block
.
name
)
# Process all the blocks of the experiment
for
block
in
experiment
.
blocks
.
all
():
# Lock the block, so nobody else can modify it
block
=
Block
.
objects
.
select_for_update
().
get
(
pk
=
block
.
pk
)
# search for other jobs with similar outputs that have no children yet
# do this carefully, as other experiments may be scheduled at the same
# time, invalidating our "parent" choice
parent
=
Job
.
objects
.
filter
(
block__outputs__in
=
block
.
outputs
.
all
(),
child
=
None
).
first
()
if
parent
is
not
None
:
#(candidate only) try to lock it
while
True
:
parent
=
Job
.
objects
.
select_for_update
().
get
(
pk
=
parent
.
pk
)
if
parent
.
child_
is
not
None
:
#was taken meanwhile, retry
parent
=
parent
.
child
continue
job
=
Job
(
block
=
block
,
parent
=
parent
)
break
else
:
job
=
Job
(
block
=
block
)
job
.
save
()
# Mark the experiment as scheduled
experiment
.
status
=
Experiment
.
SCHEDULED
experiment
.
save
()
#----------------------------------------------------------
This diff is collapsed.
Click to expand it.
beat/web/experiments/models/block.py
+
0
−
55
View file @
706bdf51
...
@@ -196,61 +196,6 @@ class Block(models.Model):
...
@@ -196,61 +196,6 @@ class Block(models.Model):
results
=
property
(
lambda
self
:
self
.
__return_first__
(
'
results
'
))
results
=
property
(
lambda
self
:
self
.
__return_first__
(
'
results
'
))
def
_schedule
(
self
):
'''
Schedules this block for execution at the backend
To
"
schedule
"
means solely creating a :py:class:`..backend.models.Job`
pointing to this object. This method **should only be called by the
owning experiment**. It is not part of the Block
'
s public API.
'''
# lock self - avoids concurrent update from scheduler/worker subsystem
self_
=
Block
.
objects
.
select_for_update
().
get
(
pk
=
self
.
pk
)
# checks we have not, meanwhile, been cancelled
if
self_
.
done
():
return
# checks queue and environment
if
self
.
queue
is
None
:
raise
RuntimeError
(
"
Block `%s
'
does not have a queue assigned
"
\
"
- this normally indicates the originally selected
"
\
"
queue was deleted since the experiment was first
"
\
"
configured. Re-configure this experiment and select a new
"
\
"
default or block-specific queue
"
%
self
.
name
)
if
self
.
environment
is
None
:
raise
RuntimeError
(
"
Block `%s
'
does not have an environment
"
\
"
assigned - this normally indicates the originally selected
"
\
"
environment was deleted since the experiment was first
"
\
"
configured. Re-configure this experiment and select a new
"
\
"
default or block-specific environment
"
%
self
.
name
)
# search for other jobs with similar outputs that have no children yet
# do this carefully, as other experiments may be scheduled at the same
# time, invalidating our "parent" choice
parent
=
Job
.
objects
.
filter
(
block__outputs__in
=
self
.
outputs
.
all
(),
child
=
None
).
first
()
if
parent
is
not
None
:
#(candidate only) try to lock it
while
True
:
parent
=
Job
.
objects
.
select_for_update
().
get
(
pk
=
parent
.
pk
)
if
parent
.
child_
is
not
None
:
#was taken meanwhile, retry
parent
=
parent
.
child
continue
job
=
Job
(
block
=
self
,
parent
=
parent
)
break
else
:
job
=
Job
(
block
=
self
)
job
.
save
()
# checks if the job is immediately runnable - if so, tries to
# make it runnable (check caches and other)
if
self
.
is_runnable
():
self
.
job
.
_make_runnable
()
def
done
(
self
):
def
done
(
self
):
'''
Says whether the block has finished or not
'''
'''
Says whether the block has finished or not
'''
...
...
This diff is collapsed.
Click to expand it.
beat/web/experiments/models/experiment.py
+
3
−
27
View file @
706bdf51
...
@@ -818,35 +818,11 @@ class Experiment(Shareable):
...
@@ -818,35 +818,11 @@ class Experiment(Shareable):
self
.
save
()
self
.
save
()
@transaction.atomic
def
schedule
(
self
):
def
schedule
(
self
):
'''
Schedules this experiment for execution at the backend
'''
Schedules this experiment for execution at the backend
'''
Because the experiment is fully built on ``save()`` (including block
interdependence and cache requirements), to
"
schedule
"
means solely
creating :py:class:`..backend.models.Job`
'
s to address all
algorithm-equipped blocks in the experiment. A ``Job`` is the
reflection of the experiment
'
s block for the backend and makes the
schedule aware of execution units that must be processed. Each ``Job``
is then split on the scheduler process, for as many times as required
by the :py:class:`Block`
'
s ``required_slots`` entry, effectively
creating one :py:class:`..backend.models.JobSplit` per split.
'''
self_
=
Experiment
.
objects
.
select_for_update
().
get
(
pk
=
self
.
pk
)
if
self_
.
status
!=
Experiment
.
PENDING
:
from
...backend.helpers
import
schedule_experiment
return
schedule_experiment
(
self
)
for
block
in
self
.
blocks
.
all
():
block
.
_schedule
()
# notice that the previous call may decide all is done already
# so, we must respect that before setting the SCHEDULED status
self
.
refresh_from_db
()
if
not
self
.
is_done
():
self
.
status
=
Experiment
.
SCHEDULED
self
.
save
()
@transaction.atomic
@transaction.atomic
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment