tm5_test.py 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844
  1. #! /usr/bin/env python
  2. import rc
  3. import os
  4. import sys
  5. import subprocess
  6. import glob
  7. from optparse import OptionParser
  8. import time
  9. import re
  10. import ttb_compare # For METHOD #2 in comparing restart. Comment for method #1
  11. import pdb
  12. """
  13. Module with THREE classes and ONE function.
  14. The 3 classes are:
  15. rctm5 : derived RcFile class, with specifics for TM5. Requires the
  16. tools/ttb/bin/rc.py for its raw option.
  17. tm5_test : class associated to a list of rc files.
  18. Has method to compare output restart from two runs associated
  19. to two of the rc files.
  20. LL : set of LoadLeveler specific functions (tested only with ECMWF
  21. implementation only). **OBSOLETE**
  22. --------------------------------------------------------------------------------
  23. The function 'testtm' (which is run if module is called as a script) is an
  24. example on how to use the classes defined here. It takes one or two input(s),
  25. which must be TM5 rc filenames:
  26. if two : run both cases and compare final restarts
  27. if one : run three cases (no mpi, 4, and 10 tasks), by modifying
  28. par.ntask, and compare their final restarts
  29. OR
  30. run two cases (w/ and w/o intermediate restart), by modifying
  31. jobstep.length, and compare their final restarts
  32. --------------------------------------------------------------------------------
  33. In a module you would do:
  34. import tm5_test
  35. tm5_test.testtm( ['file1.rc'], mpi=True, tm6=True )
  36. tm5_test.testtm( ['file1.rc', 'file2.rc'], new=True )
  37. At the command line, the same calls would be:
  38. $> tm5_test.py -m6 file1.rc
  39. $> tm5_test.py -n file1.rc file2.rc
  40. See:
  41. $> tm5_test.py -h
  42. """
  43. #////////////////////////////////////////////////////////
  44. # LoadLeveler specific functions (check run, get timing..)
  45. #////////////////////////////////////////////////////////
  46. class LL(object):
  47. """
  48. FIXME: should inherited rctm5 object => then no need for rootname at
  49. initialization
  50. """
  51. def __init__(self, rootname):
  52. self.root = rootname
  53. self.err=(''.join([self.root,'_init.err']),
  54. ''.join([self.root,'_run.err'] ),
  55. ''.join([self.root,'_done.err']) )
  56. # basic check on existence of the run (wait a bit)
  57. if not os.path.isfile(self.err[0]):
  58. time.sleep(10)
  59. if not os.path.isfile(self.err[0]):
  60. print "Run not started..."
  61. raise Exception
  62. def clean_jobs(self):
  63. """
  64. simply remove the log files
  65. """
  66. def check_jobs(self):
  67. """
  68. check completion of each 3 steps of a run
  69. """
  70. # INIT
  71. while self.isRunning(0): time.sleep(10)
  72. if not self.step_success(0):
  73. print self.root, " : FAIL at init step"
  74. raise Exception
  75. # RUN
  76. while self.isRunning(1): time.sleep(10)
  77. if not self.step_success(1):
  78. print self.root, " : FAIL at run step"
  79. raise Exception
  80. # DONE
  81. while self.isRunning(2): time.sleep(10)
  82. if not self.step_success(2):
  83. print self.root, " : FAIL at done step"
  84. raise Exception
  85. def step_success(self, step=0, verbose=False):
  86. """
  87. Look for 'exit_code=X' (if any) in *err file from a
  88. loadleveler job step, and return True if success, False
  89. else
  90. """
  91. ff = glob.glob(self.err[step])
  92. status = False
  93. if not len(ff):
  94. if verbose : print '\t',"No file :" + self.err[step]
  95. else:
  96. ffo=open(ff[0],'r')
  97. found=False
  98. for line in ffo:
  99. match=re.search('.*exit_code=(\d+)',line)
  100. if match:
  101. found=True
  102. if match.group(1) != '0':
  103. if verbose:print '\t', 'Error = '+match.group(1)
  104. else:
  105. status=True
  106. if verbose:print ' Succes = '+match.group(1)
  107. if not found: # catches memory out-of-range submit
  108. if verbose:print ' Error = unknown'
  109. ffo.close()
  110. return status
  111. def step_get_timing(self, step=0, verbose=False):
  112. """
  113. return timing as a tuple:
  114. ( elapsed time [s], cpu time [s], cpu time [hh:mm:ss],
  115. cost [system billing units] )
  116. from a finished jobstep from Loadleveler. If the *err file
  117. associated to the step is not found, just return 0,0,0,0
  118. """
  119. ff = glob.glob(self.err[step])
  120. regex=(
  121. '^ *Elapsed: *(\d+\.\d+) *sec',
  122. '^ *CPU Tot: *(\d+\.\d+) *sec',
  123. '.*\\+(\d{2}:\d{2}:\d{2})',
  124. '^ *System Billing Units .*= *(\d+\.\d+)'
  125. )
  126. elapsed, cpu, cpuf, bill=0,0,0,0
  127. if not len(ff):
  128. if verbose : print " No file :" + self.err[step]
  129. else:
  130. ffo=open(ff[0],'r')
  131. found=False
  132. for line in ffo:
  133. for k,r in enumerate(regex):
  134. match=re.search(r,line)
  135. if match:
  136. if k == 0: elapsed=match.group(1)
  137. if k == 1: cpu=match.group(1)
  138. if k == 2: cpuf=match.group(1)
  139. if k == 3: bill=match.group(1)
  140. ffo.close()
  141. return elapsed, cpu, cpuf, bill
  142. def isRunning(self, step=0):
  143. """
  144. returns True if step is running, False else
  145. """
  146. ff = glob.glob(self.err[step])
  147. if not len(ff):
  148. print "File not found:", self.err[step]
  149. # get job id
  150. match=None
  151. while not match:
  152. ffo=open(ff[0],'r')
  153. regex = ''.join(['c2a.*\.\d+\.', str(step)])
  154. for line in ffo:
  155. match = re.search(regex, line)
  156. if match:
  157. idr=match.group(0)
  158. break
  159. ffo.close()
  160. if not match: time.sleep(5) # if file opened too early
  161. # get status
  162. command = ['llq', '-f', '%st', idr]
  163. pr = subprocess.Popen( command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  164. out, err = pr.communicate()
  165. check=re.match('ST\n--\n(..)\n', out)
  166. if check :
  167. status=check.group(1)
  168. #print " status of job "+idr+" : "+status
  169. # if Idle, Running, Pending, almost completed, not queued yet, or Starting
  170. if status in ['R ','P ','I ', 'CP','ST', 'NQ']:
  171. return True
  172. else:
  173. return False
  174. else:
  175. return False
  176. #////////////////////////////////////////////////////////
  177. # Derived RcFile class for TM5
  178. #////////////////////////////////////////////////////////
  179. class rctm5(rc.RcFile):
  180. """
  181. Derived RcFile class that adds 'run', 'checkrun', 'clean' and a
  182. 'convert2nc3' methods.
  183. Add the following attributes (derived or existing in the rc dico but used a lot):
  184. self.rundone : T/F
  185. self.status : '', 'crashed', 'running'
  186. self.runtype : 'foreground', 'queue'
  187. self.region1 : name of first region (eg: 'glb600x400')
  188. self.restart : restart file in netCDF-4 format
  189. self.restart3 : restart file in netCDF-3 format
  190. self.timestamp : 'startdate_enddate' string used in mmix, budget,... filenames
  191. self.logcomp : name of log of setup_tm5 script
  192. self.rundir : run directory
  193. self.ok : tm5.ok fully qualified filename
  194. Also expands path of filename at init.
  195. """
  196. def __init__(self, filename, raw=False):
  197. rc.RcFile.__init__(self,os.path.expanduser(filename), raw=raw)
  198. self.setfilenames()
  199. self.rundone = False
  200. self.status = 'unknown'
  201. self.runtype = self.get('submit.to')
  202. self.basic_checkrun(verbose=False)
  203. def setfilenames(self):
  204. try:
  205. # restart filenames (original in netCDF 4 and converted to nc-3)
  206. outrestart = self.get('restart.write.dir' )
  207. etime = self.get('timerange.end' )
  208. hres = self.get('my.region1' )
  209. etime = etime.split()
  210. ymd = ''.join(etime[0].split('-'))
  211. hhmm = ''.join(etime[1][:5].split(':'))
  212. hh = ''.join(etime[1][:3].split(':'))
  213. fname = ''.join(["TM5_restart_", ymd, "_", hhmm, "_", hres, ".nc"])
  214. self.region1 = hres
  215. self.restart = os.path.join(outrestart, fname)
  216. self.restart3 = os.path.join(outrestart, ''.join([fname,"3"]))
  217. # repeat for start time, and get YYYYMMDDHH_YYYYMMDDHH string
  218. # used in mmix, jstat filenames (useful for runs with ONE job only)
  219. stime = self.get('timerange.start' )
  220. stime = stime.split()
  221. symd = ''.join(stime[0].split('-'))
  222. shhmm = ''.join(stime[1][:5].split(':'))
  223. shh = ''.join(etime[1][:3].split(':'))
  224. self.timestamp = ''.join([symd, shh, "_", ymd, hh])
  225. # rundir, tm5.ok, ...
  226. self.rundir = self.get('my.run.dir')
  227. self.ok = os.path.join(self.rundir,'tm5.ok')
  228. except:
  229. self.restart = ""
  230. self.restart3 = ""
  231. self.ok=''
  232. # Setup_tm5 script log (compilation if any)
  233. self.logcomp = os.path.join(os.curdir,
  234. os.path.splitext(self.filename)[0]+'.out')
  235. def display(self):
  236. """
  237. print basic info about tm5-rc obj.
  238. """
  239. print "RC file :", self.filename
  240. print "run dir :", self.get('my.run.dir')
  241. print "end restart :", self.restart
  242. print " ...exists :", os.path.isfile(self.restart)
  243. print "status :", self.status
  244. print "fg/bg/queue :", self.runtype
  245. print "log compil. :", self.logcomp
  246. print "--------------------------"
  247. def run(self, force=False, clean=False, queue=True):
  248. """
  249. Call setup_tm5, if final restart does not exist.
  250. If 'force', then runs even of the restart exists.
  251. If 'clean', re-compile everything ('build' dir is removed).
  252. If 'queue', use the queue manager, else run with the setting in the rc file.
  253. """
  254. if queue : self.runtype = "queue"
  255. tosubmit = force or (not os.path.isfile(self.restart))
  256. if tosubmit:
  257. command = [os.path.join(os.curdir,'setup_tm5'),
  258. self.filename,'-s']
  259. if queue :
  260. command.append("-q")
  261. #OLD # ------ loadleveler @ ECMWF specific (begin) ------
  262. #OLD # Remove any old log files (so .check_job will work)
  263. #OLD runid = self.get('my.basename')
  264. #OLD rundir = self.get('my.run.dir')
  265. #OLD mask = os.path.join(rundir,''.join([runid,'_[0-9][0-9][0-9]_*.err']))
  266. #OLD
  267. #OLD for f in glob.glob(mask):
  268. #OLD #print " removing "+f
  269. #OLD os.remove(f)
  270. #OLD
  271. #OLD # ------ loadleveler @ ECMWF specific (end) ------
  272. if clean : command.append("-n")
  273. print "submitting run for "+ self.filename
  274. fo=open(self.logcomp,'w')
  275. retcode = subprocess.call( command, stdout=fo, stderr=subprocess.STDOUT)
  276. fo.close()
  277. if retcode != 0 :
  278. print "compilation failed. See: ", self.logcomp
  279. self.status = 'crashed'
  280. self.rundone = True
  281. raise Exception
  282. else:
  283. print "submit ok"
  284. self.status = 'running'
  285. self.rundone = False
  286. else:
  287. print 'skipping run for '+ self.filename+' (final restart already exists)'
  288. self.status = 'done'
  289. self.rundone = True
  290. def convert2nc3(self):
  291. """
  292. Goal : modifying restart files so we can use 'cmp' on them.
  293. If we were dealing with netCDF-3, taking off the TimeStamp
  294. attribute from netCDF files is usually enough. But TM5 writes
  295. restart files in netCDF-4
  296. """
  297. # IF netcdf 3 then
  298. # command = ['ncatted', '-a TimeStamp,global,d,c,"a"', '-h', self.restart]
  299. # IF netcdf 4, convert to netcdf 3, without adding to history
  300. # attribute [could use nccopy if available]
  301. command = ['ncks', '-3', '-h', self.restart, self.restart3]
  302. if os.path.exists(self.restart) == True:
  303. if os.path.exists(self.restart3) == False:
  304. retcode = subprocess.call( command )
  305. if retcode != 0 :
  306. print command
  307. print "Conversion to netCDF 3 failed"
  308. raise Exception
  309. else:
  310. print "Restart converted to netCDF-3"
  311. else:
  312. print "Restart already converted to netCDF-3"
  313. else:
  314. print "No restart file to touch"
  315. def get_runtime(self):
  316. """
  317. simple print/return some runtimes for foreground run
  318. """
  319. regex = (
  320. '00] root *(\d+\.\d+)',
  321. '00] init *(\d+\.\d+)',
  322. '00] step init *(\d+\.\d+)',
  323. '00] step run *(\d+\.\d+)',
  324. '00] tmm readfield 2D *(\d+\.\d+)',
  325. '00] tmm readfield 3D *(\d+\.\d+)',
  326. '00] other *(\d+\.\d+)'
  327. )
  328. zeros=[0]*len(regex)
  329. if not self.rundone :
  330. print "run not submitted: No timing available."
  331. return zeros
  332. if self.runtype == 'foreground':
  333. try:
  334. logfile=open(self.logcomp,'r')
  335. except:
  336. print "no log file: No timing available."
  337. return zeros
  338. stat=[]
  339. for line in logfile:
  340. for k,r in enumerate(regex):
  341. klm=-1
  342. match = re.search(r, line)
  343. if match:
  344. print line,
  345. stat.append(match.group(1))
  346. klm=k
  347. if (klm+1) == len(regex) : break
  348. logfile.close()
  349. return stat
  350. else:
  351. # LoadLeveler case
  352. print "timing not directly available for LoadLeveler. See \
  353. checkrun for how-to"
  354. return zeros
  355. def checkrun(self, nowait=False):
  356. """
  357. Should return once a run is completely done. Already accounts for
  358. multiple jobsteps, and catches most crashes (as long as they give
  359. an error in log file). Now should be platform independent and works
  360. if run is in the foreground... where it is a bit overkill.
  361. """
  362. print "checking run for "+self.filename
  363. #if self.rundone :
  364. # print " restart already exists"
  365. # return
  366. rundir = self.get('my.run.dir')
  367. runid = self.get('my.basename')
  368. #--------------------
  369. # LOOP thru jobsteps
  370. #--------------------
  371. islast = False
  372. rcs=[]
  373. rc_exclude=[]
  374. while not islast:
  375. # get a newer rc file
  376. while not rcs :
  377. time.sleep(1)
  378. mask = ''.join([runid,'_[0-9][0-9][0-9].rc'])
  379. rcs = glob.glob(os.path.join(rundir,mask))
  380. if rc_exclude :
  381. for r in rc_exclude:
  382. if r in rcs: rcs.remove(r)
  383. rc_exclude.extend(rcs)
  384. # in case of older files around in the first pass
  385. newest = max(rcs, key=lambda x: os.stat(x).st_mtime)
  386. orc = rc.RcFile(newest)
  387. rcs=[]
  388. # step number id (001, 002, ...)
  389. find = re.search(r"_([0-9]{3})\.rc$", newest)
  390. if find:
  391. idnb=find.group(1)
  392. else:
  393. print "problem with RE to find jobstep number"
  394. raise Exception
  395. root = os.path.join(rundir,''.join([runid,'_',idnb]))
  396. # last chunk ?
  397. islast = ( orc.get('timerange.end') ==
  398. orc.get('jobstep.timerange.end') )
  399. if islast:
  400. print " checking last jobstep:", newest
  401. else:
  402. print " checking itermediate jobstep:", newest
  403. # check run
  404. #pdb.set_trace()
  405. if self.runtype == 'foreground':
  406. self.rundone = True
  407. if not os.path.exists(self.restart) : raise Exception
  408. timing=self.get_runtime()
  409. else:
  410. # ------ loadleveler @ ECMWF specific (begin) ------
  411. try:
  412. ll=LL(root)
  413. except:
  414. print "Problem with LoadLeveler object init"
  415. raise Exception
  416. ll.check_jobs()
  417. self.rundone = True
  418. print ll.step_get_timing(1)
  419. # ------ loadleveler @ ECMWF specific (end) ------
  420. # IF not using loadleveler at ECMWF, you can comment the
  421. # loadleveler lines above, and uncomment the following ones. That
  422. # just assumes a run never crashes, and wait for last restart:
  423. #while os.path.exists(self.restart) == False: time.sleep(10)
  424. #print " restart found"
  425. #
  426. ## including extra security to make sure restart is closed..
  427. #ok = os.path.join(rundir,'tm5.ok')
  428. #while os.path.exists(ok) == False: time.sleep(10)
  429. def basic_checkrun(self, verbose=True):
  430. """
  431. Check if a run is successfully terminated by checking the
  432. existence of the tm5.ok file and final restart file.
  433. Note this is not bullet proof: if between legs and final restart
  434. remains from a previous run.
  435. """
  436. if verbose: print "basic run check for "+self.filename
  437. if os.path.exists(self.ok) and os.path.exists(self.restart):
  438. if verbose: print "run sucessfully terminated"
  439. if verbose: print "--------------------------"
  440. self.rundone = True
  441. return True
  442. else:
  443. if verbose: print "run not done or crashed"
  444. if verbose: print "--------------------------"
  445. return False
  446. def cleanup(self, full=True, verbose=True):
  447. if verbose: print "cleaning up : ",self.filename
  448. # Minimal
  449. if os.path.isfile(self.restart3) : os.remove(self.restart3)
  450. # Full
  451. if full:
  452. if os.path.isfile(self.restart) : os.remove(self.restart)
  453. if os.path.isfile(self.logcomp) : os.remove(self.logcomp)
  454. # Empty rundir. Output and profiling are left in their
  455. # own directory (if different from the rundir)
  456. rundir = self.get('my.run.dir')
  457. if os.path.exists(rundir):
  458. files = [ os.path.join(rundir, f) for f in os.listdir(rundir) if
  459. os.path.isfile(os.path.join(rundir, f))]
  460. for f in files: os.remove(f)
  461. self.rundone = False
  462. def get_output_list(self):
  463. # restart and log
  464. out=[self.restart, self.logcomp]
  465. out=[f for f in out if os.path.isfile(f)]
  466. # run dir
  467. rundir = self.get('my.run.dir')
  468. if os.path.exists(rundir):
  469. files = [ os.path.join(rundir, f) for f in os.listdir(rundir) if
  470. os.path.isfile(os.path.join(rundir, f))]
  471. out=out+files
  472. # output dir
  473. rundir = self.get('output.dir')
  474. if os.path.exists(rundir):
  475. files = [ os.path.join(rundir, f) for f in os.listdir(rundir) if
  476. os.path.isfile(os.path.join(rundir, f))]
  477. out=out+files
  478. # profile dir
  479. subdir = self.get('timing.output.subdir')
  480. rundir = os.path.join(rundir, subdir)
  481. if os.path.exists(rundir):
  482. files = [ os.path.join(rundir, f) for f in os.listdir(rundir) if
  483. os.path.isfile(os.path.join(rundir, f))]
  484. out=out+files
  485. return out
  486. #////////////////////////////////////////////////////////
  487. # TEST class for TM5
  488. #////////////////////////////////////////////////////////
  489. class tm5_test(object):
  490. def __init__(self, Rcfiles):
  491. if len(Rcfiles) < 2 :
  492. print "pass at least 2 rc files for comparison"
  493. raise Exception
  494. self.rc=[]
  495. for fname in Rcfiles: self.rc.append( rctm5(fname) )
  496. def comp_restart(self, ind=[0,1], clean=False, queue=False):
  497. method=2 # 1 = Unix cmp ; 2 = tools/ttb/bin/ttb_compare.py
  498. # for now, can compare 2 runs only
  499. if len(ind) != 2 :
  500. print "requires 2 file index for comparison"
  501. raise Exception
  502. # same?
  503. if self.rc[ind[0]].restart == self.rc[ind[1]].restart:
  504. print "same path-to-file/restart-file in both cases !"
  505. print "\n SUCCESS"
  506. return 0
  507. # Submit (allows for concomittant runs in the queue)
  508. for k in ind: self.rc[k].run(clean=clean,queue=queue)
  509. # Wait for run end and do post-processing if any
  510. okrun = True
  511. for klm in ind:
  512. try:
  513. self.rc[klm].checkrun()
  514. if (method == 1): self.rc[klm].convert2nc3()
  515. except:
  516. print "\n FAILED run :", self.rc[klm].filename
  517. okrun = False
  518. if not okrun:
  519. print "\n FAILED - comparison aborted"
  520. return 1
  521. # COMPARE
  522. if method == 1:
  523. # METHOD 1 : binary comparison of restart files converted to
  524. # netCDF-3
  525. command = ['cmp', '-s', self.rc[ind[0]].restart3, self.rc[ind[1]].restart3 ]
  526. print "comparing:"
  527. print " ", self.rc[ind[0]].restart3
  528. print " ", self.rc[ind[1]].restart3
  529. retcode = subprocess.call( command )
  530. if method == 2:
  531. # METHOD #2 : if required python libraries are available, you can
  532. # also use TTB's ttb_compare.py module
  533. try :
  534. ttb_compare.df_files( self.rc[ind[0]].restart, self.rc[ind[1]].restart )
  535. retcode=0
  536. except :
  537. retcode=1
  538. if retcode != 0 :
  539. print "\n FAILED"
  540. else:
  541. print "\n SUCCESS"
  542. # minimal cleaning
  543. for rc in self.rc : rc.cleanup(full=False, verbose=False)
  544. return retcode
  545. def teardown(self):
  546. for rc in self.rc : rc.cleanup()
  547. #////////////////////////////////////////////////////////
  548. def testtm( args, new=False, queue=False, tm6=False, teardown=False, mpi=False, restart=False):
  549. narg=len(args)
  550. status = 0 # exit code
  551. if narg == 1:
  552. # Replacing "my.project.dir" key ensures that the build are
  553. # different (the key is mandatory), and that output and restart
  554. # (which are assumed to be defined below that dir), are in
  555. # different locations.
  556. rcobj=rctm5(args[0], raw=True)
  557. if teardown and not (restart or mpi):
  558. print """
  559. Nothing to clean up, no test being specified. Use
  560. also --mpi (-m) or --restart (-r)
  561. """
  562. if restart:
  563. rcno = 'onechunk.rc'
  564. rcyes = 'twochunks.rc'
  565. rcobj.replace('timerange.start', "2006-01-01 00:00:00")
  566. rcobj.replace('timerange.end', "2006-01-02 03:00:00")
  567. rcobj.replace('jobstep.length', 'inf')
  568. rcobj.replace('my.project.dir',' ${my.scratch}/TM5/test/onechunk')
  569. rcobj.WriteFile(rcno)
  570. rcobj.replace('jobstep.length', 1)
  571. rcobj.replace('my.project.dir', '${my.scratch}/TM5/test/twochunks')
  572. rcobj.WriteFile(rcyes)
  573. test = tm5_test( [rcyes, rcno] )
  574. if teardown :
  575. test.teardown()
  576. if os.path.isfile(rcyes): os.remove(rcyes)
  577. if os.path.isfile(rcno): os.remove(rcno)
  578. else:
  579. print "\nCompare add. intermediate restart:\n"
  580. status = status + test.comp_restart( clean=new, queue=queue )
  581. if mpi:
  582. rcnames=['one_proc.rc','four_proc.rc','ten_proc.rc']
  583. rcobj.replace('par.mpi','F')
  584. rcobj.replace('par.ntask',1)
  585. if tm6:
  586. rcobj.replace('par.nx',1)
  587. rcobj.replace('par.ny',1)
  588. rcobj.replace('my.project.dir','${my.scratch}/TM5/test/nompi')
  589. rcobj.WriteFile(rcnames[0])
  590. rcobj.replace('par.mpi','T')
  591. rcobj.replace('par.ntask',4)
  592. if tm6:
  593. rcobj.replace('par.nx',2)
  594. rcobj.replace('par.ny',2)
  595. rcobj.replace('my.project.dir','${my.scratch}/TM5/test/mpi4')
  596. rcobj.WriteFile(rcnames[1])
  597. rcobj.replace('par.mpi','T')
  598. rcobj.replace('par.ntask',10)
  599. if tm6:
  600. rcobj.replace('par.nx',2)
  601. rcobj.replace('par.ny',5)
  602. rcobj.replace('my.project.dir','${my.scratch}/TM5/test/mpi10')
  603. rcobj.WriteFile(rcnames[2])
  604. test = tm5_test( rcnames )
  605. if teardown :
  606. test.teardown()
  607. for rc in filter(lambda x: os.path.isfile(x), rcnames):
  608. os.remove(rc)
  609. else:
  610. print "\nCompare no-mpi and 4-procs-mpi runs:\n"
  611. status = status + test.comp_restart([0,1], clean=new, queue=queue)
  612. print "\nCompare 4-procs-mpi and 10-procs-mpi runs:\n"
  613. status = status + test.comp_restart([1,2], clean=new, queue=queue)
  614. print "\nCompare no-mpi and 10-procs-mpi runs:\n"
  615. status = status + test.comp_restart([0,2], queue=queue)
  616. elif narg == 2:
  617. test = tm5_test(args)
  618. if teardown :
  619. test.teardown()
  620. else:
  621. print "\nCompare \n"+args[0]+"\n and \n"+args[1]+"\n"
  622. status = status + test.comp_restart( clean=new, queue=queue )
  623. else:
  624. print "requires 1 or 2 rc file argument(s), not ", narg
  625. status=1
  626. return status
  627. #////////////////////////////////////////////////////////
  628. if __name__ == "__main__":
  629. parser = OptionParser(usage='%prog [options] rcfile_1 [rcfile_2]')
  630. parser.add_option("-t", "--teardown", action="store_true", dest="teardown",
  631. help="remove restart, files in rundir & compilation log")
  632. parser.add_option("-n", "--new", action="store_true", dest="new",
  633. help="recompile everything fresh (a la realclean)")
  634. parser.add_option("-q", "--queue", action="store_true", dest="queue",
  635. help="submit job to queue instead of foreground")
  636. parser.add_option("-m", "--mpi", action="store_true", dest="mpi",
  637. help="test if different #procs gives same result")
  638. parser.add_option("-r", "--restart", action="store_true", dest="restart",
  639. help="test if cutting a run in two smaller chunks gives the same result")
  640. parser.add_option("-6", "--tm6", action="store_true", dest="tm6",
  641. help="indicates that rc file is for a tm6 model, use only for --mpi test.")
  642. options, args = parser.parse_args()
  643. sys.exit( testtm( args,
  644. new=options.new, queue=options.queue,
  645. tm6=options.tm6, teardown=options.teardown,
  646. mpi=options.mpi, restart=options.restart) )