From 2a712e438de6e2e141dad54d3bab6c9afc798ddd Mon Sep 17 00:00:00 2001 From: Andre Anjos <andre.anjos@idiap.ch> Date: Thu, 23 Jun 2016 17:24:26 +0200 Subject: [PATCH] [backend] Improve the handling of splits to be cancelled --- beat/web/backend/models.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/beat/web/backend/models.py b/beat/web/backend/models.py index 052390ffb..540d199b3 100644 --- a/beat/web/backend/models.py +++ b/beat/web/backend/models.py @@ -434,9 +434,22 @@ class Worker(models.Model): # cancel job splits by killing associated processes for j in JobSplit.objects.filter(worker=self, status=Job.CANCEL, - end_date__isnull=True, process_id__isnull=False): - if psutil.pid_exists(j.process_id): + end_date__isnull=True): + if j.process_id is not None and psutil.pid_exists(j.process_id): os.kill(j.process_id, signal.SIGTERM) + else: # process went away without any apparent reason + with transaction.atomic(): + message = "Split %d/%d running at worker `%s' for " \ + "block `%s' of experiment `%s' finished without any " \ + "apparent reason. Checking-out job split at " \ + "database by force..." % (j.split_index+1, + j.job.block.required_slots, + self, + j.job.block.name, + j.job.block.experiment.fullname(), + ) + j.end(Result(status=1, usrerr=settings.DEFAULT_USER_ERROR, + syserr=message)) # cmdline base argument cmdline = [process] -- GitLab