diff --git a/beat/web/backend/tests.py b/beat/web/backend/tests.py index 140fc81c3fc0f73e6d6986dd90529ece66992a71..977a2384276731419b4e2343b3fc4575142f1d0b 100644 --- a/beat/web/backend/tests.py +++ b/beat/web/backend/tests.py @@ -1864,6 +1864,57 @@ class Scheduling(BaseBackendTestCase): self.assertNotEqual(assigned_splits[0], assigned_splits[1]) + def test_cancel_concurrent_job(self): + + # tests a simple scheduling activity in which two jobs of the same + # experiment must be scheduled concurrently, provided there is enough + # space. Then, fails one of them, waits the experiment to fail + # completely. Processing jobs must be cancelled. + + current_stats = HourlyStatistics.objects.count() + + fullname = 'user/user/triangle/1/triangle' + xp = Experiment.objects.get(name=fullname.split(os.sep)[-1]) + + worker = Worker.objects.get() + + # schedules the experiment and check it + xp.schedule() + xp.refresh_from_db() + self.assertEqual(xp.status, Experiment.SCHEDULED) + + assigned_splits = schedule() + + self.assertEqual(len(assigned_splits), 2) + self.assertEqual(assigned_splits[0].job.block.experiment, xp) + self.assertEqual(assigned_splits[1].job.block.experiment, xp) + self.assertNotEqual(assigned_splits[0], assigned_splits[1]) + + + # simulate job start on worker + assigned_splits[0].start() + assigned_splits[1].start() + + # now fail one of the jobs, the end result is the experiment fails + assigned_splits[1].end(Result(status=15)) #simulated sigterm sent + self.assertEqual(assigned_splits[1].job.status, Job.FAILED) + + # cancels the job which is marked for cancelling, checks the final + # experiment state is as expected (this is the worker job) + self.assertEqual(assigned_splits[0].job.splits.first().status, + Job.CANCEL) + assigned_splits[0].job.splits.first().end(None, Job.CANCELLED) + + xp.refresh_from_db() + self.assertEqual( + [str(k) for k in xp.blocks.order_by('id').values_list('status', + flat=True)], + [Block.CANCELLED, Block.FAILED, Block.CANCELLED, Block.CANCELLED] + ) + self.assertEqual(xp.status, Experiment.FAILED) + + + class SchedulingPriority(BaseBackendTestCase): @@ -2645,7 +2696,7 @@ class WorkingExternally(TransactionTestCase): def test_cancel_running(self): - # tests an experiment can actually be run + # tests an experiment can be cancelled while running current_stats = HourlyStatistics.objects.count() diff --git a/beat/web/experiments/models.py b/beat/web/experiments/models.py index c9a88c8ddc736f31bdd606cfd80b2045c90d9fab..c9480bc1dea89d07238f35face67ae5f3ce763c8 100644 --- a/beat/web/experiments/models.py +++ b/beat/web/experiments/models.py @@ -1090,6 +1090,12 @@ class Block(models.Model): b._cancel() if b.is_runnable(): b.job._make_runnable() + # Update eventual running siblings in case of a failure + if self.status == Block.FAILED: + for b in Block.objects.filter(experiment=self.experiment, + status=Block.PROCESSING): + b._cancel() + def _update_state(self, timings=None): '''Updates self state as a result of backend running