Skip to content
Snippets Groups Projects
Commit 57bd3218 authored by Tiago de Freitas Pereira's avatar Tiago de Freitas Pereira
Browse files

Merge branch 'add-gpu-queue' into 'master'

Added a GPU queue that defaults to short_gpu

See merge request !41
parents e34ce246 608a0eff
Branches
Tags
1 merge request!41Added a GPU queue that defaults to short_gpu
Pipeline #44930 failed
......@@ -2,5 +2,5 @@ from dask.distributed import Client
from bob.pipelines.distributed.sge import SGEMultipleQueuesCluster
cluster = SGEMultipleQueuesCluster(min_jobs=20)
cluster = SGEMultipleQueuesCluster(min_jobs=1)
dask_client = Client(cluster)
from dask.distributed import Client
from bob.pipelines.distributed.sge import SGEMultipleQueuesCluster
from bob.pipelines.distributed.sge_queues import QUEUE_LIGHT
from bob.pipelines.distributed.sge_queues import QUEUE_GPU
cluster = SGEMultipleQueuesCluster(min_jobs=20, sge_job_spec=QUEUE_LIGHT)
cluster = SGEMultipleQueuesCluster(min_jobs=1, sge_job_spec=QUEUE_GPU)
dask_client = Client(cluster)
......@@ -109,7 +109,7 @@ def get_max_jobs(queue_dict):
class SGEMultipleQueuesCluster(JobQueueCluster):
"""Launch Dask jobs in the SGE cluster allowing the request of multiple
queus.
queues.
Parameters
----------
......@@ -127,7 +127,7 @@ class SGEMultipleQueuesCluster(JobQueueCluster):
sge_job_spec: dict
Dictionary containing a minimum specification for the qsub command.
It cosists of:
It consists of:
queue: SGE queue
memory: Memory requirement in GB (e.g. 4GB)
......@@ -358,7 +358,7 @@ class AdaptiveMultipleQueue(Adaptive):
This custom implementation extends the `Adaptive.recommendations` by looking
at the `distributed.scheduler.TaskState.resource_restrictions`.
The heristics is:
The heuristics is:
.. note ::
If a certain task has the status `no-worker` and it has resource_restrictions, the scheduler should
......@@ -446,7 +446,7 @@ class SchedulerResourceRestriction(Scheduler):
def __init__(self, *args, **kwargs):
super(SchedulerResourceRestriction, self).__init__(
idle_timeout=300
idle_timeout=3600
if rc.get("bob.pipelines.sge.idle_timeout") is None
else rc.get("bob.pipelines.sge.idle_timeout"),
allowed_failures=100
......
......@@ -50,14 +50,14 @@ default
"""
QUEUE_LIGHT = {
QUEUE_GPU = {
"default": {
"queue": "q_1day",
"memory": "4GB",
"queue": "q_short_gpu",
"memory": "30GB",
"io_big": False,
"resource_spec": "",
"max_jobs": 96,
"resources": "",
"max_jobs": 45,
"resources": {"q_short_gpu": 1},
},
"q_1day": {
"queue": "q_1day",
......@@ -98,6 +98,5 @@ QUEUE_LIGHT = {
},
}
"""
This queue setup has a light arrangement.
For CPU jobs, it prioritizes all.q and not io_big
This queue setup uses the q_short_gpu queue of the SGE.
"""
......@@ -44,9 +44,9 @@ setup(
# entry_points defines which scripts will be inside the 'bin' directory
entry_points = {
'dask.client': [
'local-parallel = bob.pipelines.config.distributed.local_parallel',
'sge = bob.pipelines.config.distributed.sge_default',
'sge-light = bob.pipelines.config.distributed.sge_light',
'local-parallel = bob.pipelines.config.distributed.local_parallel:dask_client',
'sge = bob.pipelines.config.distributed.sge_default:dask_client',
'sge-gpu = bob.pipelines.config.distributed.sge_gpu:dask_client',
],
},
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment