diff --git a/bob/pipelines/config/distributed/sge_default.py b/bob/pipelines/config/distributed/sge_default.py index 45d9b5a88d8a39fb629195def198e14d3426d55e..9af2f1f68b7d0d7c643c6c9b8a7dfdfddfe104b3 100644 --- a/bob/pipelines/config/distributed/sge_default.py +++ b/bob/pipelines/config/distributed/sge_default.py @@ -2,5 +2,5 @@ from dask.distributed import Client from bob.pipelines.distributed.sge import SGEMultipleQueuesCluster -cluster = SGEMultipleQueuesCluster(min_jobs=20) +cluster = SGEMultipleQueuesCluster(min_jobs=1) dask_client = Client(cluster) diff --git a/bob/pipelines/config/distributed/sge_light.py b/bob/pipelines/config/distributed/sge_gpu.py similarity index 50% rename from bob/pipelines/config/distributed/sge_light.py rename to bob/pipelines/config/distributed/sge_gpu.py index aa58cc2ba7ae809a5abffcc7814825c73d079358..74998652d5f6980a0769d7232882e0c845c37f7f 100644 --- a/bob/pipelines/config/distributed/sge_light.py +++ b/bob/pipelines/config/distributed/sge_gpu.py @@ -1,7 +1,7 @@ from dask.distributed import Client from bob.pipelines.distributed.sge import SGEMultipleQueuesCluster -from bob.pipelines.distributed.sge_queues import QUEUE_LIGHT +from bob.pipelines.distributed.sge_queues import QUEUE_GPU -cluster = SGEMultipleQueuesCluster(min_jobs=20, sge_job_spec=QUEUE_LIGHT) +cluster = SGEMultipleQueuesCluster(min_jobs=1, sge_job_spec=QUEUE_GPU) dask_client = Client(cluster) diff --git a/bob/pipelines/distributed/sge.py b/bob/pipelines/distributed/sge.py index 59befacaf30b0bdae2b16e6118e19ac9a2aa91e8..441a56914e34a31f835b4229266a48d0508989bb 100644 --- a/bob/pipelines/distributed/sge.py +++ b/bob/pipelines/distributed/sge.py @@ -109,7 +109,7 @@ def get_max_jobs(queue_dict): class SGEMultipleQueuesCluster(JobQueueCluster): """Launch Dask jobs in the SGE cluster allowing the request of multiple - queus. + queues. Parameters ---------- @@ -127,7 +127,7 @@ class SGEMultipleQueuesCluster(JobQueueCluster): sge_job_spec: dict Dictionary containing a minimum specification for the qsub command. - It cosists of: + It consists of: queue: SGE queue memory: Memory requirement in GB (e.g. 4GB) @@ -358,7 +358,7 @@ class AdaptiveMultipleQueue(Adaptive): This custom implementation extends the `Adaptive.recommendations` by looking at the `distributed.scheduler.TaskState.resource_restrictions`. - The heristics is: + The heuristics is: .. note :: If a certain task has the status `no-worker` and it has resource_restrictions, the scheduler should @@ -446,7 +446,7 @@ class SchedulerResourceRestriction(Scheduler): def __init__(self, *args, **kwargs): super(SchedulerResourceRestriction, self).__init__( - idle_timeout=300 + idle_timeout=3600 if rc.get("bob.pipelines.sge.idle_timeout") is None else rc.get("bob.pipelines.sge.idle_timeout"), allowed_failures=100 diff --git a/bob/pipelines/distributed/sge_queues.py b/bob/pipelines/distributed/sge_queues.py index e8b5920b2124b287b7c81a10e85a62a8b27d7e3f..825b2e9f5fe975b23dc9ee866c8f92c1881be18a 100644 --- a/bob/pipelines/distributed/sge_queues.py +++ b/bob/pipelines/distributed/sge_queues.py @@ -50,14 +50,14 @@ default """ -QUEUE_LIGHT = { +QUEUE_GPU = { "default": { - "queue": "q_1day", - "memory": "4GB", + "queue": "q_short_gpu", + "memory": "30GB", "io_big": False, "resource_spec": "", - "max_jobs": 96, - "resources": "", + "max_jobs": 45, + "resources": {"q_short_gpu": 1}, }, "q_1day": { "queue": "q_1day", @@ -98,6 +98,5 @@ QUEUE_LIGHT = { }, } """ -This queue setup has a light arrangement. -For CPU jobs, it prioritizes all.q and not io_big +This queue setup uses the q_short_gpu queue of the SGE. """ diff --git a/setup.py b/setup.py index b3829f2f81c51c831dcf7ddf12f4654f8b9f7093..7de28d78161e83eeece19d6f27807e0f5a28badd 100644 --- a/setup.py +++ b/setup.py @@ -44,9 +44,9 @@ setup( # entry_points defines which scripts will be inside the 'bin' directory entry_points = { 'dask.client': [ - 'local-parallel = bob.pipelines.config.distributed.local_parallel', - 'sge = bob.pipelines.config.distributed.sge_default', - 'sge-light = bob.pipelines.config.distributed.sge_light', + 'local-parallel = bob.pipelines.config.distributed.local_parallel:dask_client', + 'sge = bob.pipelines.config.distributed.sge_default:dask_client', + 'sge-gpu = bob.pipelines.config.distributed.sge_gpu:dask_client', ], },