#! /usr/bin/env python import rc import os import sys import subprocess import glob from optparse import OptionParser from datetime import timedelta import time import re import pdb """ Module with a class dedicated to TM5 rcfiles rctm5 : derived RcFile class, with specifics for TM5. In a module you can do: import tm5 otm = tm5.rctm5('chem-test-cbm4.rc') otm.display() otm.run(queue=True) if otm.basic_checkrun(): elapsed = otm.get_runtime(verbose=True) otm.cleanup() Note that can be initialized with a 'raw' rcfile, which let you modify keys before expansion: otm = tm5.rctm5('chem-test-cbm4.rc', raw=True) otm.replace( 'my.project.dir',' ${my.scratch}/another-proj-dir' ) otm.WriteFile('my-new-rcfile') # needed for run to account for the change! ntm = tm5.rctm5('my-new-rcfile') ntm.run() """ class rctm5(rc.RcFile): """ Derived RcFile class, which: Adds methods: 'run' 'basic_checkrun' 'clean' Adds the following attributes (derived or existing in the rc dico but used a lot): self.rundone : T/F self.status : '', 'crashed', 'running' self.runtype : 'foreground', 'queue' self.region1 : name of first region (eg: 'glb600x400') self.restart : restart file self.timestamp : 'startdate_enddate' string used in mmix, budget,... filenames self.logcomp : name of log of setup_tm5 script self.rundir : run directory self.ok : tm5.ok fully qualified filename Also expands path of filename at init. """ def __init__(self, filename, raw=False): rc.RcFile.__init__(self,os.path.expanduser(filename), raw=raw) self.setfilenames() self.rundone = False self.status = 'unknown' if raw: self.runtype = 'undefined' else: self.runtype = self.get('submit.to') self.basic_checkrun(verbose=False) self.runtime = {} def setfilenames(self): try: # restart filenames (original in netCDF 4 and converted to nc-3) outrestart = self.get('restart.write.dir' ) etime = self.get('timerange.end' ) hres = self.get('my.region1' ) etime = etime.split() ymd = ''.join(etime[0].split('-')) hhmm = ''.join(etime[1][:5].split(':')) hh = ''.join(etime[1][:3].split(':')) fname = ''.join(["TM5_restart_", ymd, "_", hhmm, "_", hres, ".nc"]) self.region1 = hres self.restart = os.path.join(outrestart, fname) # repeat for start time, and get YYYYMMDDHH_YYYYMMDDHH string # used in mmix, jstat filenames (useful for runs with ONE job only) stime = self.get('timerange.start' ) stime = stime.split() symd = ''.join(stime[0].split('-')) shhmm = ''.join(stime[1][:5].split(':')) shh = ''.join(etime[1][:3].split(':')) self.timestamp = ''.join([symd, shh, "_", ymd, hh]) # rundir, tm5.ok, ... self.rundir = self.get('my.run.dir') self.ok = os.path.join(self.rundir,'tm5.ok') except: self.restart = "" self.ok='' # Setup_tm5 script log (compilation if any) self.logcomp = os.path.join(os.curdir, os.path.splitext(self.filename)[0]+'.out') def display(self): """ print basic info about tm5-rc obj. """ print "RC file :", self.filename print "run dir :", self.get('my.run.dir') print "end restart :", self.restart print " ...exists :", os.path.isfile(self.restart) print "status :", self.status print "fg/bg/queue :", self.runtype print "log compil. :", self.logcomp print "--------------------------" def run(self, force=False, clean=False, queue=True): """ Call setup_tm5, if final restart does not exist. If 'force', then runs even if the restart exists. If 'clean', re-compile everything ('build' dir is removed). If 'queue', use the queue manager, else run with the setting in the rc file. """ if queue : self.runtype = "queue" tosubmit = force or (not os.path.isfile(self.restart)) if tosubmit: command = [os.path.join(os.curdir,'setup_tm5'), self.filename,'-s'] if queue: command.append("-q") if clean : command.append("-n") print "submitting run for "+ self.filename fo=open(self.logcomp,'w') retcode = subprocess.call( command, stdout=fo, stderr=subprocess.STDOUT) fo.close() if retcode != 0 : print "compilation failed. See: ", self.logcomp self.status = 'crashed' self.rundone = True raise Exception else: print "submit ok" self.status = 'running' self.rundone = False else: print 'skipping run for '+ self.filename+' (final restart already exists)' self.status = 'done' self.rundone = True def get_runtime(self, verbose=False, total=False): """ Set and Get (return) runtime of all available legs into a dictionary of key,val = log filename [string], runtime [deltatime] for each leg. If total is True, simply returns total runtime in seconds. """ exp = re.compile('submit_tm5_step_run - wall time after run.*(\d+):(\d+):(\d+) \(hh:mm:ss\)') rundir = self.get('my.run.dir') runid = self.get('my.basename') mask = ''.join([runid,'_[0-9][0-9][0-9]_run.out']) logs = glob.glob(os.path.join(rundir,mask)) stat={} for fname in logs: with open(fname, 'r') as f: for line in f: match = exp.match(line) if match: stat[fname] = timedelta( hours = int(match.group(1)), minutes = int(match.group(2)), seconds = int(match.group(3)) ) if verbose: print ''.join([os.path.basename(fname),':']), stat[fname] self.runtime = stat if stat : total_rt = timedelta(seconds=sum(dt.total_seconds() for dt in stat.values())) if verbose: if stat: print 'total runtime:', total_rt else: print 'no runtime available' if total: return total_rt else: return stat def basic_checkrun(self, restart=True, verbose=False): """ Check if a run is successfully terminated by checking the existence of the tm5.ok file and optionally final restart file. Note this is not bullet proof: if between legs and final restart remains from a previous run. """ if verbose: print "basic run check for "+self.filename, if os.path.exists(self.ok) and not (restart and not os.path.exists(self.restart)): if verbose: print ": sucessfully terminated" if verbose: print "--------------------------" self.rundone = True return True else: if verbose: print ": not finished or crashed" if verbose: print "--------------------------" return False def cleanup(self, full=False, verbose=False): if verbose: print "cleaning up : ",self.filename # -- Minimal if os.path.isfile(self.restart) : os.remove(self.restart) if os.path.isfile(self.logcomp) : os.remove(self.logcomp) # Empty rundir. Output and profiling are left in their # own directory (if different from the rundir) rundir = self.get('my.run.dir') if os.path.exists(rundir): files = [ os.path.join(rundir, f) for f in os.listdir(rundir) if os.path.isfile(os.path.join(rundir, f))] for f in files: os.remove(f) self.rundone = False # -- Full if full: for f in get_output_list(): os.remove(f) def get_output_list(self): # restart and log out=[self.restart, self.logcomp] out=[f for f in out if os.path.isfile(f)] # run dir rundir = self.get('my.run.dir') if os.path.exists(rundir): files = [ os.path.join(rundir, f) for f in os.listdir(rundir) if os.path.isfile(os.path.join(rundir, f))] out=out+files # output dir rundir = self.get('output.dir') if os.path.exists(rundir): files = [ os.path.join(rundir, f) for f in os.listdir(rundir) if os.path.isfile(os.path.join(rundir, f))] out=out+files # profile dir subdir = self.get('timing.output.subdir') rundir = os.path.join(rundir, subdir) if os.path.exists(rundir): files = [ os.path.join(rundir, f) for f in os.listdir(rundir) if os.path.isfile(os.path.join(rundir, f))] out=out+files return out def timers_dict(self, *timers): ''' Returns root timers of the first leg in a dictionary, if available. If strings arguments are passed, only their entries are returned. ''' if self.get('timing.output') == 'T': # prf filename for root rundir = self.get('output.dir') subdir = self.get('timing.output.subdir') runid = self.get('my.basename') mask = ''.join([runid,'_001_0000.prf']) rootlog = os.path.join(rundir,subdir,mask) # scan prf files sep = "# index, total time, name" regB = re.compile(sep) sep = "# for each timer, total times spent on child processes" regE = re.compile(sep) isTimerLine = False alltimers={} with open(rootlog, 'r') as f: for line in f: begin = regB.match(line) end = regE.match(line) if begin: isTimerLine = True continue if end: isTimerLine = False if isTimerLine: splitline = line.split() alltimers[' '.join(splitline[2:])] = float(splitline[1]) if timers: return dict((t, alltimers[t]) for t in timers if t in alltimers) else: return alltimers else: print self.filename + " doesn't have timers for first leg"