Skip to content
Snippets Groups Projects
Commit 2a712e43 authored by André Anjos's avatar André Anjos :speech_balloon:
Browse files

[backend] Improve the handling of splits to be cancelled

parent e8d1602b
No related branches found
No related tags found
No related merge requests found
Pipeline #
...@@ -434,9 +434,22 @@ class Worker(models.Model): ...@@ -434,9 +434,22 @@ class Worker(models.Model):
# cancel job splits by killing associated processes # cancel job splits by killing associated processes
for j in JobSplit.objects.filter(worker=self, status=Job.CANCEL, for j in JobSplit.objects.filter(worker=self, status=Job.CANCEL,
end_date__isnull=True, process_id__isnull=False): end_date__isnull=True):
if psutil.pid_exists(j.process_id): if j.process_id is not None and psutil.pid_exists(j.process_id):
os.kill(j.process_id, signal.SIGTERM) os.kill(j.process_id, signal.SIGTERM)
else: # process went away without any apparent reason
with transaction.atomic():
message = "Split %d/%d running at worker `%s' for " \
"block `%s' of experiment `%s' finished without any " \
"apparent reason. Checking-out job split at " \
"database by force..." % (j.split_index+1,
j.job.block.required_slots,
self,
j.job.block.name,
j.job.block.experiment.fullname(),
)
j.end(Result(status=1, usrerr=settings.DEFAULT_USER_ERROR,
syserr=message))
# cmdline base argument # cmdline base argument
cmdline = [process] cmdline = [process]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment