Skip to content
Snippets Groups Projects
Commit 57bd3218 authored by Tiago de Freitas Pereira's avatar Tiago de Freitas Pereira
Browse files

Merge branch 'add-gpu-queue' into 'master'

Added a GPU queue that defaults to short_gpu

See merge request !41
parents e34ce246 608a0eff
Branches
Tags
1 merge request!41Added a GPU queue that defaults to short_gpu
Pipeline #44930 failed
...@@ -2,5 +2,5 @@ from dask.distributed import Client ...@@ -2,5 +2,5 @@ from dask.distributed import Client
from bob.pipelines.distributed.sge import SGEMultipleQueuesCluster from bob.pipelines.distributed.sge import SGEMultipleQueuesCluster
cluster = SGEMultipleQueuesCluster(min_jobs=20) cluster = SGEMultipleQueuesCluster(min_jobs=1)
dask_client = Client(cluster) dask_client = Client(cluster)
from dask.distributed import Client from dask.distributed import Client
from bob.pipelines.distributed.sge import SGEMultipleQueuesCluster from bob.pipelines.distributed.sge import SGEMultipleQueuesCluster
from bob.pipelines.distributed.sge_queues import QUEUE_LIGHT from bob.pipelines.distributed.sge_queues import QUEUE_GPU
cluster = SGEMultipleQueuesCluster(min_jobs=20, sge_job_spec=QUEUE_LIGHT) cluster = SGEMultipleQueuesCluster(min_jobs=1, sge_job_spec=QUEUE_GPU)
dask_client = Client(cluster) dask_client = Client(cluster)
...@@ -109,7 +109,7 @@ def get_max_jobs(queue_dict): ...@@ -109,7 +109,7 @@ def get_max_jobs(queue_dict):
class SGEMultipleQueuesCluster(JobQueueCluster): class SGEMultipleQueuesCluster(JobQueueCluster):
"""Launch Dask jobs in the SGE cluster allowing the request of multiple """Launch Dask jobs in the SGE cluster allowing the request of multiple
queus. queues.
Parameters Parameters
---------- ----------
...@@ -127,7 +127,7 @@ class SGEMultipleQueuesCluster(JobQueueCluster): ...@@ -127,7 +127,7 @@ class SGEMultipleQueuesCluster(JobQueueCluster):
sge_job_spec: dict sge_job_spec: dict
Dictionary containing a minimum specification for the qsub command. Dictionary containing a minimum specification for the qsub command.
It cosists of: It consists of:
queue: SGE queue queue: SGE queue
memory: Memory requirement in GB (e.g. 4GB) memory: Memory requirement in GB (e.g. 4GB)
...@@ -358,7 +358,7 @@ class AdaptiveMultipleQueue(Adaptive): ...@@ -358,7 +358,7 @@ class AdaptiveMultipleQueue(Adaptive):
This custom implementation extends the `Adaptive.recommendations` by looking This custom implementation extends the `Adaptive.recommendations` by looking
at the `distributed.scheduler.TaskState.resource_restrictions`. at the `distributed.scheduler.TaskState.resource_restrictions`.
The heristics is: The heuristics is:
.. note :: .. note ::
If a certain task has the status `no-worker` and it has resource_restrictions, the scheduler should If a certain task has the status `no-worker` and it has resource_restrictions, the scheduler should
...@@ -446,7 +446,7 @@ class SchedulerResourceRestriction(Scheduler): ...@@ -446,7 +446,7 @@ class SchedulerResourceRestriction(Scheduler):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super(SchedulerResourceRestriction, self).__init__( super(SchedulerResourceRestriction, self).__init__(
idle_timeout=300 idle_timeout=3600
if rc.get("bob.pipelines.sge.idle_timeout") is None if rc.get("bob.pipelines.sge.idle_timeout") is None
else rc.get("bob.pipelines.sge.idle_timeout"), else rc.get("bob.pipelines.sge.idle_timeout"),
allowed_failures=100 allowed_failures=100
......
...@@ -50,14 +50,14 @@ default ...@@ -50,14 +50,14 @@ default
""" """
QUEUE_LIGHT = { QUEUE_GPU = {
"default": { "default": {
"queue": "q_1day", "queue": "q_short_gpu",
"memory": "4GB", "memory": "30GB",
"io_big": False, "io_big": False,
"resource_spec": "", "resource_spec": "",
"max_jobs": 96, "max_jobs": 45,
"resources": "", "resources": {"q_short_gpu": 1},
}, },
"q_1day": { "q_1day": {
"queue": "q_1day", "queue": "q_1day",
...@@ -98,6 +98,5 @@ QUEUE_LIGHT = { ...@@ -98,6 +98,5 @@ QUEUE_LIGHT = {
}, },
} }
""" """
This queue setup has a light arrangement. This queue setup uses the q_short_gpu queue of the SGE.
For CPU jobs, it prioritizes all.q and not io_big
""" """
...@@ -44,9 +44,9 @@ setup( ...@@ -44,9 +44,9 @@ setup(
# entry_points defines which scripts will be inside the 'bin' directory # entry_points defines which scripts will be inside the 'bin' directory
entry_points = { entry_points = {
'dask.client': [ 'dask.client': [
'local-parallel = bob.pipelines.config.distributed.local_parallel', 'local-parallel = bob.pipelines.config.distributed.local_parallel:dask_client',
'sge = bob.pipelines.config.distributed.sge_default', 'sge = bob.pipelines.config.distributed.sge_default:dask_client',
'sge-light = bob.pipelines.config.distributed.sge_light', 'sge-gpu = bob.pipelines.config.distributed.sge_gpu:dask_client',
], ],
}, },
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment