Commit ef932dbc authored by Amir MOHAMMADI's avatar Amir MOHAMMADI

Do not automatically remove G from the GPU memory requirements

parent 46608952
Pipeline #29294 passed with stage
in 5 minutes
...@@ -24,7 +24,7 @@ from ..tools import make_shell, logger ...@@ -24,7 +24,7 @@ from ..tools import make_shell, logger
from .. import local, sge from .. import local, sge
from ..models import Status from ..models import Status
QUEUES = ['all.q', 'q1d', 'q1w', 'q1m', 'q1dm', 'q1wm', 'gpu', 'lgpu', 'sgpu'] QUEUES = ['all.q', 'q1d', 'q1w', 'q1m', 'q1dm', 'q1wm', 'gpu', 'lgpu', 'sgpu', 'gpum']
def setup(args): def setup(args):
"""Returns the JobManager and sets up the basic infrastructure""" """Returns the JobManager and sets up the basic infrastructure"""
...@@ -130,16 +130,12 @@ def submit(args): ...@@ -130,16 +130,12 @@ def submit(args):
if args.qname != 'all.q': kwargs['hvmem'] = args.memory if args.qname != 'all.q': kwargs['hvmem'] = args.memory
# if this is a GPU queue and args.memory is provided, we set gpumem flag # if this is a GPU queue and args.memory is provided, we set gpumem flag
# remove 'G' last character from the args.memory string # remove 'G' last character from the args.memory string
if args.qname in ('gpu', 'lgpu', 'sgpu') and args.memory is not None: if args.qname in ('gpu', 'lgpu', 'sgpu', 'gpum') and args.memory is not None:
# allow args.memory to have either <num>G or <num> format kwargs['gpumem'] = args.memory
if args.memory.isdigit():
kwargs['gpumem'] = args.memory # assign directly
elif args.memory.endswith('G'):
kwargs['gpumem'] = args.memory[:-1] # remove G at the end
# don't set these for GPU processing or the maximum virtual memroy will be # don't set these for GPU processing or the maximum virtual memroy will be
# set on ulimit # set on ulimit
if 'memfree' in kwargs: del kwargs['memfree'] kwargs.pop('memfree', None)
if 'hvmem' in kwargs: del kwargs['hvmem'] kwargs.pop('hvmem', None)
if args.parallel is not None: if args.parallel is not None:
kwargs['pe_opt'] = "pe_mth %d" % args.parallel kwargs['pe_opt'] = "pe_mth %d" % args.parallel
if args.memory is not None: if args.memory is not None:
...@@ -172,8 +168,14 @@ def resubmit(args): ...@@ -172,8 +168,14 @@ def resubmit(args):
kwargs['memfree'] = args.memory kwargs['memfree'] = args.memory
if args.qname not in (None, 'all.q'): if args.qname not in (None, 'all.q'):
kwargs['hvmem'] = args.memory kwargs['hvmem'] = args.memory
if args.queue in ('gpu', 'lgpu', 'sgpu'): # if this is a GPU queue and args.memory is provided, we set gpumem flag
# remove 'G' last character from the args.memory string
if args.qname in ('gpu', 'lgpu', 'sgpu', 'gpum') and args.memory is not None:
kwargs['gpumem'] = args.memory kwargs['gpumem'] = args.memory
# don't set these for GPU processing or the maximum virtual memroy will be
# set on ulimit
kwargs.pop('memfree', None)
kwargs.pop('hvmem', None)
if args.parallel is not None: if args.parallel is not None:
kwargs['pe_opt'] = "pe_mth %d" % args.parallel kwargs['pe_opt'] = "pe_mth %d" % args.parallel
kwargs['memfree'] = get_memfree(args.memory, args.parallel) kwargs['memfree'] = get_memfree(args.memory, args.parallel)
......
...@@ -13,7 +13,10 @@ from .setshell import environ ...@@ -13,7 +13,10 @@ from .setshell import environ
from .models import add_job, Job from .models import add_job, Job
from .tools import logger, qsub, qstat, qdel, make_shell, makedirs_safe from .tools import logger, qsub, qstat, qdel, make_shell, makedirs_safe
import os, sys import os
import sys
import re
class JobManagerSGE(JobManager): class JobManagerSGE(JobManager):
"""The JobManager will submit and control the status of submitted jobs""" """The JobManager will submit and control the status of submitted jobs"""
...@@ -79,7 +82,7 @@ class JobManagerSGE(JobManager): ...@@ -79,7 +82,7 @@ class JobManagerSGE(JobManager):
logger.warn("This job will never be executed since the 'io_big' flag is not available for the 'all.q'.") logger.warn("This job will never be executed since the 'io_big' flag is not available for the 'all.q'.")
if 'pe_opt' in kwargs and ('queue' not in kwargs or kwargs['queue'] not in ('q1dm', 'q_1day_mth', 'q1wm', 'q_1week_mth')): if 'pe_opt' in kwargs and ('queue' not in kwargs or kwargs['queue'] not in ('q1dm', 'q_1day_mth', 'q1wm', 'q_1week_mth')):
logger.warn("This job will never be executed since the queue '%s' does not support multi-threading (pe_mth) -- use 'q1dm' or 'q1wm' instead." % kwargs['queue'] if 'queue' in kwargs else 'all.q') logger.warn("This job will never be executed since the queue '%s' does not support multi-threading (pe_mth) -- use 'q1dm' or 'q1wm' instead." % kwargs['queue'] if 'queue' in kwargs else 'all.q')
if 'gpumem' in kwargs and 'queue' in kwargs and kwargs['queue'] in ('gpu', 'lgpu', 'sgpu') and int(kwargs['gpumem']) > 24: if 'gpumem' in kwargs and 'queue' in kwargs and kwargs['queue'] in ('gpu', 'lgpu', 'sgpu') and int(re.sub("\D", "", kwargs['gpumem'])) > 24:
logger.warn("This job will never be executed since the GPU queue '%s' cannot have more than 24GB of memory." % kwargs['queue']) logger.warn("This job will never be executed since the GPU queue '%s' cannot have more than 24GB of memory." % kwargs['queue'])
assert job.id == grid_id assert job.id == grid_id
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment