From 9ebe6228a7d150369f3c78a7e4136629ae9dba5c Mon Sep 17 00:00:00 2001
From: Manuel Gunther <siebenkopf@googlemail.com>
Date: Thu, 31 Mar 2016 12:17:50 -0600
Subject: [PATCH] Added time stamps for submission, start end finish time

---
 doc/manual.rst           |  3 +++
 gridtk/manager.py        | 10 +++++++---
 gridtk/models.py         | 34 +++++++++++++++++++++++++++++++++-
 gridtk/script/jman.py    |  3 ++-
 gridtk/tests/__init__.py | 23 ++++++++++++++++++++++-
 version.txt              |  2 +-
 6 files changed, 68 insertions(+), 7 deletions(-)

diff --git a/doc/manual.rst b/doc/manual.rst
index 9fb0273..532eae3 100644
--- a/doc/manual.rst
+++ b/doc/manual.rst
@@ -159,6 +159,9 @@ You can use it to select a range of job ids, e.g., ``-j 1-4 6-8``.
 In this case, please assert that there are no spaces between job ids and the ``-`` separator.
 If any job id is specified, which is not available in the database, it will simply be ignored, including job ids that in the ranges.
 
+Since version 1.3.0, GridTK also saves timing information about jobs, i.e., time stamps when jobs were submitted, started and finished.
+You can use the ``-t`` option of ``jman ls`` to add the time stamps to the listing, which are both written for jobs and parametric jobs (i.e., when using the ``-a`` option).
+
 
 Inspecting log files
 --------------------
diff --git a/gridtk/manager.py b/gridtk/manager.py
index cfc852b..30224a9 100644
--- a/gridtk/manager.py
+++ b/gridtk/manager.py
@@ -4,7 +4,7 @@ from __future__ import print_function
 import os, sys
 import subprocess
 import socket # to get the host name
-from .models import Base, Job, ArrayJob, Status
+from .models import Base, Job, ArrayJob, Status, times
 from .tools import logger
 
 
@@ -200,7 +200,7 @@ class JobManager:
         self.unlock()
 
 
-  def list(self, job_ids, print_array_jobs = False, print_dependencies = False, long = False, status=Status, names=None, ids_only=False):
+  def list(self, job_ids, print_array_jobs = False, print_dependencies = False, long = False, print_times = False, status=Status, names=None, ids_only=False):
     """Lists the jobs currently added to the database."""
     # configuration for jobs
     if print_dependencies:
@@ -231,7 +231,6 @@ class JobManager:
       print('  '.join(header))
       print(delimiter)
 
-
     self.lock()
     for job in self.get_jobs(job_ids):
       job.refresh()
@@ -240,11 +239,16 @@ class JobManager:
           print(job.unique, end=" ")
         else:
           print(job.format(format, dependency_length, None if long else 43))
+        if print_times:
+          print(times(job))
+
         if (not ids_only) and print_array_jobs and job.array:
           print(array_delimiter)
           for array_job in job.array:
             if array_job.status in status:
               print(array_job.format(array_format))
+              if print_times:
+                print(times(array_job))
           print(array_delimiter)
 
     self.unlock()
diff --git a/gridtk/models.py b/gridtk/models.py
index 600a67e..f641683 100644
--- a/gridtk/models.py
+++ b/gridtk/models.py
@@ -1,11 +1,12 @@
 import sqlalchemy
-from sqlalchemy import Table, Column, Integer, String, Boolean, ForeignKey
+from sqlalchemy import Table, Column, Integer, DateTime, String, Boolean, ForeignKey
 from sqlalchemy.orm import backref
 from sqlalchemy.ext.declarative import declarative_base
 from .tools import Enum, relationship
 
 import os
 import sys
+from datetime import datetime
 
 if sys.version_info[0] >= 3:
   from pickle import dumps, loads
@@ -29,6 +30,10 @@ class ArrayJob(Base):
   result = Column(Integer)
   machine_name = Column(String(10))
 
+  submit_time = Column(DateTime)
+  start_time = Column(DateTime)
+  finish_time = Column(DateTime)
+
   job = relationship("Job", backref='array', order_by=id)
 
   def __init__(self, id, job_id):
@@ -38,6 +43,11 @@ class ArrayJob(Base):
     self.result = None
     self.machine_name = None # will be set later, by the Job class
 
+    self.submit_time = datetime.now()
+    self.start_time = None
+    self.finish_time = None
+
+
   def std_out_file(self):
     return self.job.std_out_file() + "." + str(self.id) if self.job.log_dir else None
 
@@ -76,6 +86,11 @@ class Job(Base):
   array_string = Column(String(255))           # The array string (only needed for re-submission)
   stop_on_failure = Column(Boolean)            # An indicator whether to stop depending jobs when this job finishes with an error
 
+  submit_time = Column(DateTime)
+  start_time = Column(DateTime)
+  finish_time = Column(DateTime)
+
+
   status = Column(Enum(*Status))
   result = Column(Integer)
 
@@ -105,6 +120,9 @@ class Job(Base):
       array_job.result = None
       array_job.machine_name = None
     self.id = self.unique
+    self.submit_time = datetime.now()
+    self.start_time = None
+    self.finish_time = None
 
 
   def queue(self, new_job_id = None, new_job_name = None, queue_name = None):
@@ -148,8 +166,11 @@ class Job(Base):
           array_job.status = 'executing'
           if machine_name is not None:
             array_job.machine_name = machine_name
+            array_job.start_time = datetime.now()
     elif machine_name is not None:
       self.machine_name = machine_name
+    if self.start_time is None:
+      self.start_time = datetime.now()
 
     # sometimes, the 'finish' command did not work for array jobs,
     # so check if any old job still has the 'executing' flag set
@@ -169,6 +190,7 @@ class Job(Base):
         if array_job.id == array_id:
           array_job.status = new_status
           array_job.result = result
+          array_job.finish_time = datetime.now()
         if array_job.status not in ('success', 'failure'):
           finished = False
         elif new_result == 0:
@@ -178,6 +200,7 @@ class Job(Base):
       # There was no array job, or all array jobs finished
       self.status = 'success' if new_result == 0 else 'failure'
       self.result = new_result
+      self.finish_time = datetime.now()
 
       # update all waiting jobs
       for job in self.get_jobs_waiting_for_us():
@@ -362,3 +385,12 @@ def add_job(session, command_line, name = 'job', dependencies = [], array = None
   session.commit()
 
   return job
+
+def times(job):
+  """Returns a string containing timing information for teh given job, which might be a :py:class:`Job` or an :py:class:`ArrayJob`."""
+  timing = "Submitted: %s" % job.submit_time.ctime()
+  if job.start_time is not None:
+    timing += "\nStarted  : %s \t Job waited  : %s" % (job.start_time.ctime(), job.start_time - job.submit_time)
+  if job.finish_time is not None:
+    timing += "\nFinished : %s \t Job executed: %s" % (job.finish_time.ctime(), job.finish_time - job.start_time)
+  return timing
diff --git a/gridtk/script/jman.py b/gridtk/script/jman.py
index b09ce8b..c1ca120 100644
--- a/gridtk/script/jman.py
+++ b/gridtk/script/jman.py
@@ -179,7 +179,7 @@ def run_scheduler(args):
 def list(args):
   """Lists the jobs in the given database."""
   jm = setup(args)
-  jm.list(job_ids=get_ids(args.job_ids), print_array_jobs=args.print_array_jobs, print_dependencies=args.print_dependencies, status=args.status, long=args.long, ids_only=args.ids_only, names=args.names)
+  jm.list(job_ids=get_ids(args.job_ids), print_array_jobs=args.print_array_jobs, print_dependencies=args.print_dependencies, status=args.status, long=args.long, print_times=args.print_times, ids_only=args.ids_only, names=args.names)
 
 
 def communicate(args):
@@ -322,6 +322,7 @@ def main(command_line_options = None):
   list_parser.add_argument('-n', '--names', metavar='NAME', nargs='+', help='List only the jobs with the given names (by default, all jobs are listed)')
   list_parser.add_argument('-a', '--print-array-jobs', action='store_true', help='Also list the array ids.')
   list_parser.add_argument('-l', '--long', action='store_true', help='Prints additional information about the submitted job.')
+  list_parser.add_argument('-t', '--print-times', action='store_true', help='Prints timing information on when jobs were submited, executed and finished')
   list_parser.add_argument('-x', '--print-dependencies', action='store_true', help='Print the dependencies of the jobs as well.')
   list_parser.add_argument('-o', '--ids-only', action='store_true', help='Prints ONLY the job ids (so that they can be parsed by automatic scripts).')
   list_parser.add_argument('-s', '--status', nargs='+', choices = Status, default = Status, help='Delete only jobs that have the given statuses; by default all jobs are deleted.')
diff --git a/gridtk/tests/__init__.py b/gridtk/tests/__init__.py
index d618f79..c5568c9 100644
--- a/gridtk/tests/__init__.py
+++ b/gridtk/tests/__init__.py
@@ -60,7 +60,7 @@ class GridTKTest(unittest.TestCase):
       print()
       # test that the list command works (should also work with the "default" grid manager
       jman.main(['./bin/jman', '--database', self.database, 'list', '--job-ids', '1'])
-      jman.main(['./bin/jman', '--database', self.database, 'list', '--job-ids', '2', '--print-array-jobs', '--print-dependencies'])
+      jman.main(['./bin/jman', '--database', self.database, 'list', '--job-ids', '2', '--print-array-jobs', '--print-dependencies', '--print-times'])
 
       # get insight into the database
       job_manager = gridtk.local.JobManagerLocal(database=self.database)
@@ -74,6 +74,12 @@ class GridTKTest(unittest.TestCase):
       self.assertEqual(jobs[0].status, 'submitted')
       self.assertEqual(jobs[1].status, 'submitted')
       self.assertEqual(jobs[2].status, 'submitted')
+      self.assertTrue(all(j.submit_time is not None for j in jobs))
+      self.assertTrue(all(j.start_time is None for j in jobs))
+      self.assertTrue(all(j.finish_time is None for j in jobs))
+      self.assertTrue(all(j.submit_time is not None for j in jobs[1].array))
+      self.assertTrue(all(j.start_time is None for j in jobs[1].array))
+      self.assertTrue(all(j.finish_time is None for j in jobs[1].array))
 
       # check that the job dependencies are correct
       waiting = jobs[0].get_jobs_waiting_for_us()
@@ -107,6 +113,13 @@ class GridTKTest(unittest.TestCase):
       self.assertEqual(jobs[0].status, 'failure')
       self.assertEqual(jobs[1].status, 'queued')
       self.assertEqual(jobs[2].status, 'waiting')
+      self.assertTrue(jobs[0].start_time is not None)
+      self.assertTrue(jobs[0].finish_time is not None)
+      self.assertTrue(jobs[1].start_time is None)
+      self.assertTrue(jobs[1].finish_time is None)
+      self.assertTrue(jobs[2].start_time is None)
+      self.assertTrue(jobs[2].finish_time is None)
+
       # the result files should already be there
       self.assertTrue(os.path.exists(jobs[0].std_out_file()))
       self.assertTrue(os.path.exists(jobs[0].std_err_file()))
@@ -193,6 +206,14 @@ class GridTKTest(unittest.TestCase):
         self.assertEqual(jobs[1].array[i].result, 0)
       self.assertEqual(jobs[2].status, 'success')
       self.assertEqual(jobs[2].result, 0)
+
+      self.assertTrue(all(j.submit_time is not None for j in jobs))
+      self.assertTrue(all(j.start_time is not None for j in jobs))
+      self.assertTrue(all(j.finish_time is not None for j in jobs))
+      self.assertTrue(all(j.submit_time is not None for j in jobs[1].array))
+      self.assertTrue(all(j.start_time is not None for j in jobs[1].array))
+      self.assertTrue(all(j.finish_time is not None for j in jobs[1].array))
+
       job_manager.unlock()
 
       print()
diff --git a/version.txt b/version.txt
index e447d7d..94bfba8 100644
--- a/version.txt
+++ b/version.txt
@@ -1 +1 @@
-1.3.0b0
+1.3.0b1
-- 
GitLab