From 0b01cdff30cd4f7a29e4883003ebb5a89f51b389 Mon Sep 17 00:00:00 2001 From: Andre Anjos <andre.anjos@idiap.ch> Date: Thu, 30 Jun 2016 17:37:23 +0200 Subject: [PATCH] [backend] Remove syserr field, use logger instead; Prefer logger.error() as way to contact sysadmins than directly e-mailing --- .../migrations/0003_remove_result_syserr.py | 19 ++++++ beat/web/backend/models.py | 42 +++++------- beat/web/databases/api.py | 15 ++-- beat/web/experiments/models.py | 5 -- beat/web/utils/api.py | 68 ------------------- 5 files changed, 48 insertions(+), 101 deletions(-) create mode 100644 beat/web/backend/migrations/0003_remove_result_syserr.py delete mode 100644 beat/web/utils/api.py diff --git a/beat/web/backend/migrations/0003_remove_result_syserr.py b/beat/web/backend/migrations/0003_remove_result_syserr.py new file mode 100644 index 000000000..4d779b4fd --- /dev/null +++ b/beat/web/backend/migrations/0003_remove_result_syserr.py @@ -0,0 +1,19 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.9.5 on 2016-06-30 17:35 +from __future__ import unicode_literals + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('backend', '0002_scheduler_addons'), + ] + + operations = [ + migrations.RemoveField( + model_name='result', + name='syserr', + ), + ] diff --git a/beat/web/backend/models.py b/beat/web/backend/models.py index b828c4205..d896979b1 100644 --- a/beat/web/backend/models.py +++ b/beat/web/backend/models.py @@ -448,9 +448,8 @@ class Worker(models.Model): j.job.block.name, j.job.block.experiment.fullname(), ) - logger.warn(message) - j.end(Result(status=1, usrerr=settings.DEFAULT_USER_ERROR, - syserr=message)) + logger.error(message) + j.end(Result(status=1, usrerr=settings.DEFAULT_USER_ERROR)) # cmdline base argument cmdline = [process] @@ -478,9 +477,9 @@ class Worker(models.Model): "Available environments are `%s'" % \ '|'.join(environments.keys()), ) - logger.warn(message) + logger.error(message) split.end(Result(status=1, - usrerr=settings.DEFAULT_USER_ERROR, syserr=message)) + usrerr=settings.DEFAULT_USER_ERROR)) continue # if we get to this point, then we launch the user process @@ -695,7 +694,6 @@ class Result(models.Model): stdout = models.TextField(null=True, blank=True) stderr = models.TextField(null=True, blank=True) usrerr = models.TextField(null=True, blank=True) - syserr = models.TextField(null=True, blank=True) _stats = models.TextField(null=True, blank=True) timed_out = models.BooleanField(default=False) cancelled = models.BooleanField(default=False) @@ -707,7 +705,6 @@ class Result(models.Model): if self.stdout: retval += ', stdout=' + self.stdout if self.stderr: retval += ', stderr=' + self.stderr if self.usrerr: retval += ', usrerr=' + self.usrerr - if self.syserr: retval += ', syserr=' + self.syserr retval += ')' return retval @@ -879,8 +876,8 @@ class Job(models.Model): message = "Index splitting for block `%s' of experiment " \ "`%s' could not be completed: not splittable!" % \ (self.block.name, self.block.experiment.fullname()) - logger.warn(message) - self._cancel(usrerr=settings.DEFAULT_USER_ERROR, syserr=message) + logger.error(message) + self._cancel(usrerr=settings.DEFAULT_USER_ERROR) # if you get to this point, the splitting has succeeded, # create the necessary splits and assign the ranges @@ -905,8 +902,8 @@ class Job(models.Model): "error: %s" % (self.block.name, self.block.experiment.fullname(), traceback.format_exc()) - logger.warn(message) - self._cancel(usrerr=settings.DEFAULT_USER_ERROR, syserr=message) + logger.error(message) + self._cancel(usrerr=settings.DEFAULT_USER_ERROR) def _cascade_updates(self): @@ -1036,7 +1033,7 @@ class Job(models.Model): os.remove(f) - def _cancel(self, usrerr=None, syserr=None): + def _cancel(self, usrerr=None): '''Cancel the execution of this job As a consequence: delete all associated jobs, mark end_date and set @@ -1055,8 +1052,8 @@ class Job(models.Model): for s in self.splits.all(): s._cancel() else: self.status = Job.CANCELLED - if usrerr or syserr: - r = Result(status=1, usrerr=usrerr, syserr=syserr) + if usrerr: + r = Result(status=1, usrerr=usrerr) r.save() self.result = r self.save() @@ -1078,7 +1075,6 @@ class Job(models.Model): stdout = _merge_strings([k.stdout for k in job_results]) stderr = _merge_strings([k.stderr for k in job_results]) usrerr = _merge_strings([k.usrerr for k in job_results]) - syserr = _merge_strings([k.syserr for k in job_results]) # merge beat.core statistics if job_results: @@ -1092,7 +1088,7 @@ class Job(models.Model): timed_out = any([k.timed_out for k in job_results]) r = Result(status=status, stdout=stdout, stderr=stderr, usrerr=usrerr, - syserr=syserr, timed_out=timed_out, cancelled=cancelled) + timed_out=timed_out, cancelled=cancelled) r.stats = stats r.save() self.result = r @@ -1428,8 +1424,8 @@ class JobSplit(models.Model): (self.split_index+1, self.job.block.required_slots, os.getpid(), self.worker, self.job.block.name, self.job.block.experiment.fullname()) - logger.warn(message) - self.try_end(Result(status=1, syserr=message, + logger.error(message) + self.try_end(Result(status=1, usrerr=settings.DEFAULT_USER_ERROR)) config['range'] = [self.start_index, self.end_index] @@ -1481,7 +1477,6 @@ class JobSplit(models.Model): stdout=result['stdout'], stderr=result['stderr'], usrerr=result['user_error'], - syserr=result['system_error'], _stats=simplejson.dumps(result['statistics'], indent=2), )) logger.info("Split `%s' (pid=%d) ended gracefully", self, @@ -1495,15 +1490,14 @@ class JobSplit(models.Model): logger.info("Split `%s' reached the maximum number of IO " \ "errors (%d > %d). Force failing this split." % \ (self, self.cache_errors, settings.MAXIMUM_IO_ERRORS)) + logger.error(traceback.format_exc()) self.try_end(Result(status=1, - usrerr=settings.DEFAULT_USER_ERROR, - syserr=traceback.format_exc(),)) + usrerr=settings.DEFAULT_USER_ERROR)) else: logger.info("Split `%s' will be retried (%d/%d)", self, self.cache_errors, settings.MAXIMUM_IO_ERRORS) except Exception: - logger.warn("Split `%s' (pid=%d) ended with an error: %s", + logger.error("Split `%s' (pid=%d) ended with an error: %s", self, os.getpid(), traceback.format_exc()) - self.try_end(Result(status=1, usrerr=settings.DEFAULT_USER_ERROR, - syserr=traceback.format_exc(),)) + self.try_end(Result(status=1, usrerr=settings.DEFAULT_USER_ERROR)) diff --git a/beat/web/databases/api.py b/beat/web/databases/api.py index dd0e9e0d6..31cbf9d10 100644 --- a/beat/web/databases/api.py +++ b/beat/web/databases/api.py @@ -47,7 +47,11 @@ from ..common.api import ListCreateBaseView from ..common.responses import BadRequestResponse from ..common.utils import ensure_html from ..dataformats.serializers import ReferencedDataFormatSerializer -from ..utils.api import report_server_error + +import logging +import traceback +logger = logging.getLogger(__name__) + #---------------------------------------------------------- @@ -222,7 +226,8 @@ class RetrieveDatabaseView(views.APIView): try: result['declaration'] = database.declaration_file.read() except: - return report_server_error() + logger.error(traceback.format_exc()) + return HttpResponse(status=500) # Retrieve the source code @@ -230,7 +235,8 @@ class RetrieveDatabaseView(views.APIView): try: result['code'] = database.source_code_file.read() except: - return report_server_error() + logger.error(traceback.format_exc()) + return HttpResponse(status=500) # Retrieve the description in HTML format @@ -270,4 +276,5 @@ class RetrieveDatabaseView(views.APIView): # Return the result return Response(result) except: - return report_server_error() + logger.error(traceback.format_exc()) + return HttpResponse(status=500) diff --git a/beat/web/experiments/models.py b/beat/web/experiments/models.py index 5e7b5a3d8..b720441fb 100644 --- a/beat/web/experiments/models.py +++ b/beat/web/experiments/models.py @@ -44,7 +44,6 @@ from beat.core.utils import NumpyJSONEncoder from ..algorithms.models import Algorithm from ..toolchains.models import Toolchain -from ..utils.api import send_email_to_administrators from ..common.models import Shareable from ..common.models import ContributionManager @@ -1159,10 +1158,6 @@ class Block(models.Model): self.outputs.update(**info) - if self.job.result.syserr: #mail admins - send_email_to_administrators('System error captured', - self.job.result.syserr) - if self.job.status == Block.SKIPPED: self.status = Block.CACHED else: diff --git a/beat/web/utils/api.py b/beat/web/utils/api.py deleted file mode 100644 index 357001026..000000000 --- a/beat/web/utils/api.py +++ /dev/null @@ -1,68 +0,0 @@ -#!/usr/bin/env python -# vim: set fileencoding=utf-8 : - -############################################################################### -# # -# Copyright (c) 2016 Idiap Research Institute, http://www.idiap.ch/ # -# Contact: beat.support@idiap.ch # -# # -# This file is part of the beat.web module of the BEAT platform. # -# # -# Commercial License Usage # -# Licensees holding valid commercial BEAT licenses may use this file in # -# accordance with the terms contained in a written agreement between you # -# and Idiap. For further information contact tto@idiap.ch # -# # -# Alternatively, this file may be used under the terms of the GNU Affero # -# Public License version 3 as published by the Free Software and appearing # -# in the file LICENSE.AGPL included in the packaging of this file. # -# The BEAT platform is distributed in the hope that it will be useful, but # -# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY # -# or FITNESS FOR A PARTICULAR PURPOSE. # -# # -# You should have received a copy of the GNU Affero Public License along # -# with the BEAT platform. If not, see http://www.gnu.org/licenses/. # -# # -############################################################################### - -from django.conf import settings -from django.http import HttpResponse -from django.core.mail import send_mail -import traceback - -import logging -logger = logging.getLogger(__name__) - - -#---------------------------------------------------------- - -def report_server_error(additional_content=None): - try: - subject = settings.EMAIL_SUBJECT_PREFIX + 'Internal server error' - - message = traceback.format_exc() - - if additional_content: - message = additional_content + '\n\n' + message - - send_mail(subject, message, settings.DEFAULT_FROM_EMAIL, - map(lambda x: x[1], settings.ADMINS)) - except Exception: - import traceback - logger.warn("Could not send e-mail to system administrators about " \ - "`%s'. Exception caught: %s", subject, traceback.format_exc()) - - return HttpResponse(status=500) - -#---------------------------------------------------------- - -def send_email_to_administrators(title, content): - try: - subject = settings.EMAIL_SUBJECT_PREFIX + title - - send_mail(subject, content, settings.DEFAULT_FROM_EMAIL, - map(lambda x: x[1], settings.ADMINS)) - except Exception: - import traceback - logger.warn("Could not send e-mail to system administrators about " \ - "`%s'. Exception caught: %s", subject, traceback.format_exc()) -- GitLab