Skip to content
Snippets Groups Projects
Commit 797e3418 authored by André Anjos's avatar André Anjos :speech_balloon:
Browse files

[backend] Fix index splitting; Printouts

parent c1568321
No related branches found
No related tags found
1 merge request!194Scheduler
......@@ -372,27 +372,28 @@ class Worker(models.Model):
if settings.DEBUG: cmdline += ['-vv']
# start newly assigned job splits
for j in JobSplit.objects.filter(worker=self, status=Job.QUEUED,
for split in JobSplit.objects.filter(worker=self, status=Job.QUEUED,
start_date__isnull=True, process_id__isnull=True):
execute = pick_execute(j, environments)
execute = pick_execute(split, environments)
if execute is None:
message = "Environment `%s' is not available for split " \
"%d/%d running at worker `%s', for block `%s' of " \
"experiment `%s': %s" % \
(self.job.block.environment,
self.split_index+1,
self.job.block.required_slots,
self.worker,
self.job.block.name,
self.job.block.experiment.fullname(),
(split.job.block.environment,
split.split_index+1,
split.job.block.required_slots,
self,
split.job.block.name,
split.job.block.experiment.fullname(),
"Available environments are `%s'" % \
'|'.join(environments.keys()),
)
j.end(Result(status=1, usrerr=settings.DEFAULT_USER_ERROR,
split.end(Result(status=1, usrerr=settings.DEFAULT_USER_ERROR,
syserr=message))
continue
# if we get to this point, then we launch the user process
subprocess.Popen(cmdline + [execute, str(j.pk)])
subprocess.Popen(cmdline + [execute, str(split.pk)])
def __enter__(self):
......@@ -573,7 +574,11 @@ class Slot(models.Model):
def _merge_strings(s):
if len(s) == 1: return s[0]
return '\n'.join(['Process %d: %s' % (i,k) for i,k in enumerate(s)])
s = [k.strip() for k in s]
if any(s):
return '\n'.join(['Process %d: %s' % (i,k) for i,k in enumerate(s)])
else:
return ''
#----------------------------------------------------------
......@@ -783,7 +788,7 @@ class Job(models.Model):
job=self,
split_index=i,
start_index=split_indices[0],
end_index=split_indices[0],
end_index=split_indices[1],
)
s.save()
......@@ -1377,7 +1382,8 @@ class JobSplit(models.Model):
self.signal_io_error()
if self.cache_errors > settings.MAXIMUM_IO_ERRORS:
self.try_end(Result(status=1,
usrerr=settings.DEFAULT_USER_ERROR, syserr=format_exc(),))
usrerr=settings.DEFAULT_USER_ERROR,
syserr=traceback.format_exc(),))
else:
logger.info("Split `%s' will be retried (%d/%d)",
self, self.cache_errors, settings.MAXIMUM_IO_ERRORS)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment