123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329 |
- #! /usr/bin/env python
- import rc
- import os
- import sys
- import subprocess
- import glob
- from optparse import OptionParser
- from datetime import timedelta
- import time
- import re
- import pdb
- """
- Module with a class dedicated to TM5 rcfiles
- rctm5 : derived RcFile class, with specifics for TM5.
- In a module you can do:
- import tm5
-
- otm = tm5.rctm5('chem-test-cbm4.rc')
- otm.display()
- otm.run(queue=True)
- if otm.basic_checkrun():
- elapsed = otm.get_runtime(verbose=True)
- otm.cleanup()
- Note that can be initialized with a 'raw' rcfile, which let you modify keys
- before expansion:
- otm = tm5.rctm5('chem-test-cbm4.rc', raw=True)
- otm.replace( 'my.project.dir',' ${my.scratch}/another-proj-dir' )
- otm.WriteFile('my-new-rcfile') # needed for run to account for the change!
- ntm = tm5.rctm5('my-new-rcfile')
- ntm.run()
-
- """
- class rctm5(rc.RcFile):
- """
- Derived RcFile class, which:
- Adds methods:
- 'run'
- 'basic_checkrun'
- 'clean'
-
- Adds the following attributes (derived or existing in the rc dico but used a lot):
- self.rundone : T/F
- self.status : '', 'crashed', 'running'
- self.runtype : 'foreground', 'queue'
- self.region1 : name of first region (eg: 'glb600x400')
- self.restart : restart file
- self.timestamp : 'startdate_enddate' string used in mmix, budget,... filenames
- self.logcomp : name of log of setup_tm5 script
- self.rundir : run directory
- self.ok : tm5.ok fully qualified filename
-
- Also expands path of filename at init.
- """
-
- def __init__(self, filename, raw=False):
- rc.RcFile.__init__(self,os.path.expanduser(filename), raw=raw)
- self.setfilenames()
- self.rundone = False
- self.status = 'unknown'
- if raw:
- self.runtype = 'undefined'
- else:
- self.runtype = self.get('submit.to')
- self.basic_checkrun(verbose=False)
- self.runtime = {}
-
- def setfilenames(self):
- try:
- # restart filenames (original in netCDF 4 and converted to nc-3)
- outrestart = self.get('restart.write.dir' )
- etime = self.get('timerange.end' )
- hres = self.get('my.region1' )
- etime = etime.split()
- ymd = ''.join(etime[0].split('-'))
- hhmm = ''.join(etime[1][:5].split(':'))
- hh = ''.join(etime[1][:3].split(':'))
- fname = ''.join(["TM5_restart_", ymd, "_", hhmm, "_", hres, ".nc"])
-
- self.region1 = hres
- self.restart = os.path.join(outrestart, fname)
-
- # repeat for start time, and get YYYYMMDDHH_YYYYMMDDHH string
- # used in mmix, jstat filenames (useful for runs with ONE job only)
- stime = self.get('timerange.start' )
- stime = stime.split()
- symd = ''.join(stime[0].split('-'))
- shhmm = ''.join(stime[1][:5].split(':'))
- shh = ''.join(etime[1][:3].split(':'))
-
- self.timestamp = ''.join([symd, shh, "_", ymd, hh])
- # rundir, tm5.ok, ...
- self.rundir = self.get('my.run.dir')
- self.ok = os.path.join(self.rundir,'tm5.ok')
- except:
- self.restart = ""
- self.ok=''
-
- # Setup_tm5 script log (compilation if any)
- self.logcomp = os.path.join(os.curdir,
- os.path.splitext(self.filename)[0]+'.out')
- def display(self):
- """
- print basic info about tm5-rc obj.
- """
- print "RC file :", self.filename
- print "run dir :", self.get('my.run.dir')
- print "end restart :", self.restart
- print " ...exists :", os.path.isfile(self.restart)
- print "status :", self.status
- print "fg/bg/queue :", self.runtype
- print "log compil. :", self.logcomp
- print "--------------------------"
-
- def run(self, force=False, clean=False, queue=True):
- """
- Call setup_tm5, if final restart does not exist.
- If 'force', then runs even if the restart exists.
- If 'clean', re-compile everything ('build' dir is removed).
- If 'queue', use the queue manager, else run with the setting in the rc file.
- """
- if queue : self.runtype = "queue"
- tosubmit = force or (not os.path.isfile(self.restart))
-
- if tosubmit:
- command = [os.path.join(os.curdir,'setup_tm5'),
- self.filename,'-s']
-
- if queue: command.append("-q")
- if clean : command.append("-n")
-
- print "submitting run for "+ self.filename
- fo=open(self.logcomp,'w')
- retcode = subprocess.call( command, stdout=fo, stderr=subprocess.STDOUT)
- fo.close()
-
- if retcode != 0 :
- print "compilation failed. See: ", self.logcomp
- self.status = 'crashed'
- self.rundone = True
- raise Exception
- else:
- print "submit ok"
- self.status = 'running'
- self.rundone = False
- else:
- print 'skipping run for '+ self.filename+' (final restart already exists)'
- self.status = 'done'
- self.rundone = True
- def get_runtime(self, verbose=False, total=False):
- """
- Set and Get (return) runtime of all available legs into a dictionary of
- key,val = log filename [string], runtime [deltatime]
- for each leg.
- If total is True, simply returns total runtime in seconds.
- """
- exp = re.compile('submit_tm5_step_run - wall time after run.*(\d+):(\d+):(\d+) \(hh:mm:ss\)')
-
- rundir = self.get('my.run.dir')
- runid = self.get('my.basename')
- mask = ''.join([runid,'_[0-9][0-9][0-9]_run.out'])
- logs = glob.glob(os.path.join(rundir,mask))
- stat={}
- for fname in logs:
- with open(fname, 'r') as f:
- for line in f:
- match = exp.match(line)
- if match:
- stat[fname] = timedelta( hours = int(match.group(1)),
- minutes = int(match.group(2)),
- seconds = int(match.group(3)) )
- if verbose: print ''.join([os.path.basename(fname),':']), stat[fname]
- self.runtime = stat
- if stat :
- total_rt = timedelta(seconds=sum(dt.total_seconds() for dt in stat.values()))
- if verbose:
- if stat:
- print 'total runtime:', total_rt
- else:
- print 'no runtime available'
- if total:
- return total_rt
- else:
- return stat
-
- def basic_checkrun(self, restart=True, verbose=False):
- """
- Check if a run is successfully terminated by checking the
- existence of the tm5.ok file and optionally final restart file.
- Note this is not bullet proof: if between legs and final restart
- remains from a previous run.
- """
- if verbose: print "basic run check for "+self.filename,
-
- if os.path.exists(self.ok) and not (restart and not os.path.exists(self.restart)):
- if verbose: print ": sucessfully terminated"
- if verbose: print "--------------------------"
- self.rundone = True
- return True
- else:
- if verbose: print ": not finished or crashed"
- if verbose: print "--------------------------"
- return False
-
- def cleanup(self, full=False, verbose=False):
- if verbose: print "cleaning up : ",self.filename
- # -- Minimal
- if os.path.isfile(self.restart) : os.remove(self.restart)
- if os.path.isfile(self.logcomp) : os.remove(self.logcomp)
-
- # Empty rundir. Output and profiling are left in their
- # own directory (if different from the rundir)
- rundir = self.get('my.run.dir')
- if os.path.exists(rundir):
- files = [ os.path.join(rundir, f) for f in os.listdir(rundir) if
- os.path.isfile(os.path.join(rundir, f))]
-
- for f in files: os.remove(f)
- self.rundone = False
- # -- Full
- if full:
- for f in get_output_list(): os.remove(f)
-
- def get_output_list(self):
- # restart and log
- out=[self.restart, self.logcomp]
- out=[f for f in out if os.path.isfile(f)]
-
- # run dir
- rundir = self.get('my.run.dir')
- if os.path.exists(rundir):
- files = [ os.path.join(rundir, f) for f in os.listdir(rundir) if
- os.path.isfile(os.path.join(rundir, f))]
- out=out+files
- # output dir
- rundir = self.get('output.dir')
- if os.path.exists(rundir):
- files = [ os.path.join(rundir, f) for f in os.listdir(rundir) if
- os.path.isfile(os.path.join(rundir, f))]
- out=out+files
- # profile dir
- subdir = self.get('timing.output.subdir')
- rundir = os.path.join(rundir, subdir)
- if os.path.exists(rundir):
- files = [ os.path.join(rundir, f) for f in os.listdir(rundir) if
- os.path.isfile(os.path.join(rundir, f))]
- out=out+files
- return out
- def timers_dict(self, *timers):
- ''' Returns root timers of the first leg in a dictionary, if available.
- If strings arguments are passed, only their entries are returned.
- '''
-
- if self.get('timing.output') == 'T':
- # prf filename for root
- rundir = self.get('output.dir')
- subdir = self.get('timing.output.subdir')
- runid = self.get('my.basename')
- mask = ''.join([runid,'_001_0000.prf'])
- rootlog = os.path.join(rundir,subdir,mask)
- # scan prf files
- sep = "# index, total time, name"
- regB = re.compile(sep)
- sep = "# for each timer, total times spent on child processes"
- regE = re.compile(sep)
- isTimerLine = False
- alltimers={}
- with open(rootlog, 'r') as f:
- for line in f:
- begin = regB.match(line)
- end = regE.match(line)
- if begin:
- isTimerLine = True
- continue
- if end:
- isTimerLine = False
-
- if isTimerLine:
- splitline = line.split()
- alltimers[' '.join(splitline[2:])] = float(splitline[1])
- if timers:
- return dict((t, alltimers[t]) for t in timers if t in alltimers)
- else:
- return alltimers
- else:
- print self.filename + " doesn't have timers for first leg"
-
|