From 4b3a4d9191bc98056928c535d017d1670b16e178 Mon Sep 17 00:00:00 2001
From: Andre Anjos <andre.dos.anjos@gmail.com>
Date: Sun, 8 May 2016 20:51:06 +0200
Subject: [PATCH] [backend] Implement more robust shutdown strategy for workers

---
 beat/web/backend/models.py | 35 +++++++++++++++++++----------------
 1 file changed, 19 insertions(+), 16 deletions(-)

diff --git a/beat/web/backend/models.py b/beat/web/backend/models.py
index 0bdda08a2..fce438433 100644
--- a/beat/web/backend/models.py
+++ b/beat/web/backend/models.py
@@ -302,30 +302,33 @@ class Worker(models.Model):
 
         '''
 
+        # disables worker, so no more splits can be assigned to it
+        with transaction.atomic():
+            self_ = Worker.objects.select_for_update().get(pk=self.pk)
+            self_.active = False
+            self_.used_cores = 0
+            self_.used_memory = 0
+            self_.info = 'Worker shutdown by system administrator'
+            self_.save()
+
         message = 'Cancelled on forced worker shutdown (maintenance)' \
             ' - you may retry submitting your experiment shortly'
 
-        self.refresh_from_db()
-
-        # cancel job splits which are running
-        for j in JobSplit.objects.filter(worker=self,
-            status__in=(Job.CANCEL, Job.PROCESSING), end_date__isnull=True,
-            process_id__isnull=False):
-            if psutil.pid_exists(j.process_id):
-                os.kill(j.process_id, signal.SIGTERM)
-
         # cancel job splits which were not yet started
         for j in JobSplit.objects.filter(worker=self, status=Job.QUEUED,
             start_date__isnull=True, process_id__isnull=True):
             j.end(Result(status=1, usrerr=message))
 
-        with transaction.atomic():
-            self_ = Worker.objects.select_for_update().get(pk=self.pk)
-            self.active = False
-            self.used_cores = 0
-            self.used_memory = 0
-            self.info = 'Worker shutdown by system administrator'
-            self.save()
+        # cancel job splits which are running
+        for j in JobSplit.objects.filter(worker=self, status=Job.PROCESSING,
+            end_date__isnull=True, process_id__isnull=False):
+            j._cancel()
+
+        # cancel job splits which should be cancelled
+        for j in JobSplit.objects.filter(worker=self, status=Job.CANCEL,
+            end_date__isnull=True, process_id__isnull=False):
+            if psutil.pid_exists(j.process_id):
+                os.kill(j.process_id, signal.SIGTERM)
 
 
     def work(self, environments, cpulimit, process):
-- 
GitLab