Merge branch 'dask-defaults' into 'master'

Better defaults for .adapt method See merge request !23

Merge branch 'dask-defaults' into 'master'
c8416be2 · Tiago de Freitas Pereira · db0c0bae · 5b8ffd65 · c8416be2 · c8416be2
Commit c8416be2 authored 5 years ago by Tiago de Freitas Pereira
--- a/bob/pipelines/config/distributed/sge_iobig_16cores.py
+++ b/bob/pipelines/config/distributed/sge_iobig_16cores.py
@@ -14,6 +14,14 @@ Q_1DAY_IO_BIG_SPEC = {

 n_jobs = 16
 cluster = SGEIdiapCluster(sge_job_spec=Q_1DAY_IO_BIG_SPEC)
-cluster.scale(n_jobs)
+cluster.scale(1)
+# Adapting to minimim 1 job to maximum 48 jobs
+# interval: Milliseconds between checks from the scheduler
+# wait_count: Number of consecutive times that a worker should be suggested for 
+#             removal before we remove it.
+#             Here the goal is to wait 2 minutes before scaling down since
+#             it is very expensive to get jobs on the SGE grid
+cluster.adapt(minimum=1, maximum=n_jobs, wait_count=120, interval=1000)
+

 dask_client = Client(cluster)
--- a/bob/pipelines/config/distributed/sge_iobig_16cores_1gpu.py
+++ b/bob/pipelines/config/distributed/sge_iobig_16cores_1gpu.py
@@ -2,9 +2,15 @@ from bob.pipelines.distributed.sge import SGEIdiapCluster, Q_1DAY_GPU_SPEC
 from dask.distributed import Client

 n_jobs = 16
-n_gpu_jobs = 1
 cluster = SGEIdiapCluster(sge_job_spec=Q_1DAY_GPU_SPEC)
-cluster.scale(n_jobs, sge_job_spec_key="default")
-cluster.scale(n_gpu_jobs, sge_job_spec_key="gpu")
+cluster.scale(1, sge_job_spec_key="gpu")
+cluster.scale(2, sge_job_spec_key="default")
+# Adapting to minimim 1 job to maximum 48 jobs
+# interval: Milliseconds between checks from the scheduler
+# wait_count: Number of consecutive times that a worker should be suggested for 
+#             removal before we remove it.
+#             Here the goal is to wait 2 minutes before scaling down since
+#             it is very expensive to get jobs on the SGE grid
+cluster.adapt(minimum=1, maximum=n_jobs, wait_count=120, interval=1000)

 dask_client = Client(cluster)
--- a/bob/pipelines/config/distributed/sge_iobig_48cores.py
+++ b/bob/pipelines/config/distributed/sge_iobig_48cores.py
@@ -3,6 +3,15 @@ from dask.distributed import Client

 n_jobs = 48
 cluster = SGEIdiapCluster(sge_job_spec=Q_1DAY_IO_BIG_SPEC)
-cluster.scale(n_jobs)
+cluster.scale(10)
+
+# Adapting to minimim 1 job to maximum 48 jobs
+# interval: Milliseconds between checks from the scheduler
+# wait_count: Number of consecutive times that a worker should be suggested for 
+#             removal before we remove it.
+#             Here the goal is to wait 2 minutes before scaling down since
+#             it is very expensive to get jobs on the SGE grid
+cluster.adapt(minimum=10, maximum=n_jobs, wait_count=120, interval=1000)
+

 dask_client = Client(cluster)