diff --git a/beat/web/backend/models.py b/beat/web/backend/models.py index 77fe9cd7d28fa7f89f92cf20fd3f55c003245413..0bdda08a2ed0c007c1335c6276fffacadf0f218f 100644 --- a/beat/web/backend/models.py +++ b/beat/web/backend/models.py @@ -372,27 +372,28 @@ class Worker(models.Model): if settings.DEBUG: cmdline += ['-vv'] # start newly assigned job splits - for j in JobSplit.objects.filter(worker=self, status=Job.QUEUED, + for split in JobSplit.objects.filter(worker=self, status=Job.QUEUED, start_date__isnull=True, process_id__isnull=True): - execute = pick_execute(j, environments) + execute = pick_execute(split, environments) if execute is None: message = "Environment `%s' is not available for split " \ "%d/%d running at worker `%s', for block `%s' of " \ "experiment `%s': %s" % \ - (self.job.block.environment, - self.split_index+1, - self.job.block.required_slots, - self.worker, - self.job.block.name, - self.job.block.experiment.fullname(), + (split.job.block.environment, + split.split_index+1, + split.job.block.required_slots, + self, + split.job.block.name, + split.job.block.experiment.fullname(), "Available environments are `%s'" % \ '|'.join(environments.keys()), ) - j.end(Result(status=1, usrerr=settings.DEFAULT_USER_ERROR, + split.end(Result(status=1, usrerr=settings.DEFAULT_USER_ERROR, syserr=message)) + continue # if we get to this point, then we launch the user process - subprocess.Popen(cmdline + [execute, str(j.pk)]) + subprocess.Popen(cmdline + [execute, str(split.pk)]) def __enter__(self): @@ -573,7 +574,11 @@ class Slot(models.Model): def _merge_strings(s): if len(s) == 1: return s[0] - return '\n'.join(['Process %d: %s' % (i,k) for i,k in enumerate(s)]) + s = [k.strip() for k in s] + if any(s): + return '\n'.join(['Process %d: %s' % (i,k) for i,k in enumerate(s)]) + else: + return '' #---------------------------------------------------------- @@ -783,7 +788,7 @@ class Job(models.Model): job=self, split_index=i, start_index=split_indices[0], - end_index=split_indices[0], + end_index=split_indices[1], ) s.save() @@ -1377,7 +1382,8 @@ class JobSplit(models.Model): self.signal_io_error() if self.cache_errors > settings.MAXIMUM_IO_ERRORS: self.try_end(Result(status=1, - usrerr=settings.DEFAULT_USER_ERROR, syserr=format_exc(),)) + usrerr=settings.DEFAULT_USER_ERROR, + syserr=traceback.format_exc(),)) else: logger.info("Split `%s' will be retried (%d/%d)", self, self.cache_errors, settings.MAXIMUM_IO_ERRORS)