From 4b3a4d9191bc98056928c535d017d1670b16e178 Mon Sep 17 00:00:00 2001 From: Andre Anjos <andre.dos.anjos@gmail.com> Date: Sun, 8 May 2016 20:51:06 +0200 Subject: [PATCH] [backend] Implement more robust shutdown strategy for workers --- beat/web/backend/models.py | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/beat/web/backend/models.py b/beat/web/backend/models.py index 0bdda08a2..fce438433 100644 --- a/beat/web/backend/models.py +++ b/beat/web/backend/models.py @@ -302,30 +302,33 @@ class Worker(models.Model): ''' + # disables worker, so no more splits can be assigned to it + with transaction.atomic(): + self_ = Worker.objects.select_for_update().get(pk=self.pk) + self_.active = False + self_.used_cores = 0 + self_.used_memory = 0 + self_.info = 'Worker shutdown by system administrator' + self_.save() + message = 'Cancelled on forced worker shutdown (maintenance)' \ ' - you may retry submitting your experiment shortly' - self.refresh_from_db() - - # cancel job splits which are running - for j in JobSplit.objects.filter(worker=self, - status__in=(Job.CANCEL, Job.PROCESSING), end_date__isnull=True, - process_id__isnull=False): - if psutil.pid_exists(j.process_id): - os.kill(j.process_id, signal.SIGTERM) - # cancel job splits which were not yet started for j in JobSplit.objects.filter(worker=self, status=Job.QUEUED, start_date__isnull=True, process_id__isnull=True): j.end(Result(status=1, usrerr=message)) - with transaction.atomic(): - self_ = Worker.objects.select_for_update().get(pk=self.pk) - self.active = False - self.used_cores = 0 - self.used_memory = 0 - self.info = 'Worker shutdown by system administrator' - self.save() + # cancel job splits which are running + for j in JobSplit.objects.filter(worker=self, status=Job.PROCESSING, + end_date__isnull=True, process_id__isnull=False): + j._cancel() + + # cancel job splits which should be cancelled + for j in JobSplit.objects.filter(worker=self, status=Job.CANCEL, + end_date__isnull=True, process_id__isnull=False): + if psutil.pid_exists(j.process_id): + os.kill(j.process_id, signal.SIGTERM) def work(self, environments, cpulimit, process): -- GitLab