# # TM5 run tools # # *** def Command_Line( rcf, exe, args, in_debugger ) : """ Return command line that runs executable. ARGUMENTS rcf Rcfile with settings. exe Name of executable. args Arguments to be passed to executable. in_debugger Set to 'True' if the job should be run in a debugger. RETURN VALUES cmndline Command line to be executed. """ # external import socket import logging # mpi run ? if rcf.get('par.mpi','bool') : # number of mpi tasks: ntask = rcf.get('par.ntask','int') # get command line: cmnd_exec = rcf.get('mpirun.command') cmnd_args = rcf.get('mpirun.args' ) # write command file ? cmdfile = rcf.get('mpirun.cmdfile',default='') if len(cmdfile) > 0 : # write command line for each task: f = open(cmdfile,'w') for i in range(ntask) : f.write( '%s %s\n' % (exe,args) ) f.close() else : # otherwise, add the executable and its arguments: cmnd_args = '%s %s %s' % (cmnd_args,exe,args) #endif # write host file ? PLS: This is done too early, and should be done # inside the *_run.jb script : that will let you specify nodes # scattered on different hosts (eg, linux cluster), and also different # from the current host (which probably is a login node)!! See # WriteJob below for cases of Loadleveler and Slurm. # # Leave it here for other cases not handled in WriteJob yet. hostfile = rcf.get('mpirun.hostfile',default='') if len(hostfile) > 0 : # get hostname: hname = socket.gethostname() # write hostname for each task: f = open(hostfile,'w') for i in range(ntask) : f.write( '%s\n' % hname ) f.close() else : # standard run: cmnd_exec = exe cmnd_args = args # run in debugger ? if in_debugger : # debugger type: debugger = rcf.get( 'debugger' ) # get debugger command: debugger_call = rcf.get( 'debugger.command' ) # large differences ... if debugger == 'totalview' : # syntaxis: totalview [-a ] # pass executable: cmndline = '%s %s' % (debugger_call,cmnd_exec) # add arguments ? if len(cmnd_args) > 0 : cmndline = '%s -a %s' % (cmndline,cmnd_args) #endif elif debugger == 'idb' : # syntaxis: idb [-args ] # fill executable and arguments: cmndline = '%s -args %s %s' % (debugger_call,cmnd_exec,cmnd_args) else : logging.error('unsupported debugger : %s' % debugger ) raise Exception #endif else : # standard line: cmndline = '%s %s' % (cmnd_exec,cmnd_args) # ok return cmndline # *** def WriteAndSubmitNewJob( rcfile, bindir ) : """ Write first or next rcfile and job files(s) in the job chain; if chain is not finished yet, submit a new job. The argument could be the name of an rcfile or an rcfile object itself, since the submit script might have changed some values given the provided command line arguments. The following function is used: submit_tm5_setup_rcfile.WriteRcfile # writes the new rcfile This is placed in a seperate file since users might need to change these routines for their specific projects. """ # external: import sys import logging import rc # import setup module: import submit_tm5_setup_rcfile # name provided ? if type(rcfile) == str : # load: rcf = rc.RcFile( rcfile ) else : # just copy .. rcf = rcfile #endif # write next rfile, return name: try : rcfile_next = submit_tm5_setup_rcfile.WriteRcfile( rcf ) except : logging.error( sys.exc_info()[1] ) logging.error( 'exception from WriteRcfile' ) raise Exception # finished ? if rcfile_next == None : logging.info( ' end of job chain !' ) else : # write job file(s) for this period and return the (first) name; # last command in a file should submit the next job if necessary: logging.info( ' write jobfile for %s ...' % rcfile_next ) try : jobfile_next = WriteJob( rcfile_next, bindir ) except : logging.error( sys.exc_info()[1] ) logging.error( 'exception from WriteJob' ) raise Exception logging.info( ' submit next job : %s' % jobfile_next ) try : jobid = SubmitJob( jobfile_next, rcfile_next ) except : logging.error( sys.exc_info()[1] ) logging.error( 'exception from SubmitJob' ) raise Exception return # *** def WriteJob( rcfile, bindir ) : """ jobfile = WriteJob(rcfile) Write job file given the settings in rcfile. The name of the jobfile is based on the name of the rcfile. The last command in the job should submit the next job, and the script is therefore written in python. """ # external: import os import rc # load settings: rcf = rc.RcFile( rcfile ) # basename for scripts etc is name of rcfile minus extension: bname,ext = os.path.splitext(rcfile) # loadleveler supports master job with multiple steps: with_master_job = (rcf.get('submit.to') == 'queue') and (rcf.get('queue') == 'loadleveler') # which shell ? job_shell = '/usr/bin/env python' # start master job ? if with_master_job : ntasks = rcf.get('par.ntask') # name of jobfile: jobfile = '%s.jb' % bname # set header: header = [] header.append( '#! %s\n' % job_shell ) header.append( '\n' ) # init queue options: qopt = QueueOptions( bname, rcf, 'default' ) # init job file: job = [] job.append( '# external:\n' ) job.append( 'import os\n' ) job.append( 'import sys\n' ) job.append( 'import socket\n') job.append( 'import subprocess\n' ) job.append( 'import logging\n' ) job.append( '\n' ) job.append( '# setup messages:\n' ) job.append( "logging.basicConfig( format='%(lineno)-4s:%(filename)-30s [%(levelname)-8s] %(message)s', level=logging.INFO, stream=sys.stdout )\n" ) job.append( '\n' ) job.append( '# prepend locations of python modules to search path:\n' ) job.append( "sys.path.insert( 0, '%s' )\n" % bindir ) job.append( '\n' ) job.append( '# tools:\n' ) job.append( 'import submit_tm5_tools\n' ) job.append( '\n' ) job.append( '# current loadleveler steps:\n' ) job.append( 'step_name = os.getenv("LOADL_STEP_NAME")\n' ) job.append( '\n' ) # HOSTFILE - Moved here from Command_Line for loadleveler. hostfile = rcf.get('mpirun.hostfile',default='') # calling script will create hostfile if (len(hostfile) > 0) and rcf.get('par.mpi','bool') : job.append( 'f = open("%s",\'w\') \n' % hostfile ) job.append( 'hname = socket.gethostname() \n') job.append( "for i in range( %s ) : \n" % ntasks ) job.append( "\tf.write( '%s\\n' % hname)\n" ) job.append( 'f.close() \n') job.append( '\n' ) # job step names: steps = rcf.get('job.steps').split(' ') # number of steps: nstep = len(steps) # loop over job steps: for istep in range(nstep) : # current: step = steps[istep] # next: if istep < nstep-1 : step_next = steps[istep+1] # list with queue option lines for this step: qopt_step = QueueOptions( bname, rcf, step ) # call to actual script: if step == 'run' : # get command line: exe = os.path.join( os.curdir, rcf.get('job.step.%s.exe' % step) ) args = rcfile indb = rcf.get('submit.debugger','bool') cmndline = Command_Line( rcf, exe, args, indb ) #