submit_tm5_step_init 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563
  1. #! /usr/bin/env python3
  2. # -----------------------------------------------
  3. # help
  4. # -----------------------------------------------
  5. """
  6. NAME
  7. submit_tm5_step_init
  8. DESCRIPTION
  9. Script to setup the run directory for the actual model run:
  10. o install input files
  11. o create output directories
  12. o run special user setup scripts
  13. INSTALLATION OF INPUT FILES
  14. Which files should be installed, where to find them, and where to install them
  15. is all specified in the rcfile.
  16. Specify space seperated list with install tasks:
  17. install.tasks : obs diffusion
  18. For each task, a number of specific settings should be defined somewhere.
  19. To have just a directory created, only specify the the local directory:
  20. <task>.install.dir : /local/data/
  21. To have a number of files copied from an archive into this local directory,
  22. also specify the archive location and a space seperated list with file names:
  23. <task>.install.arch : /archive/data/
  24. <task>.install.files : krep.dat zooi.tar morezooi.tar
  25. If a file ends with '.tar' it is unpacked automatically.
  26. The archive description should be understandible by the 'gss' scripts:
  27. /archive/data/
  28. ecfs:/tmx/TM/data # ECMWF tape archive when running on ECMWF computer
  29. ec:ecfs[tmx]:TM/data # idem from a remote computer using EcAccess tools
  30. mos:/fa/ks/TM/data # KNMI tape archive
  31. If the archive consists of more than one directory, specify a space seperated list:
  32. <task>.install.arch : /archive/data/A /archive/data/B
  33. Both the installation directory, the archive, and the file name might contain
  34. keys '<yyyy>', '<mm>', and '<dd>' for the year/month/day withing the job timerange:
  35. <task>.install.arch : /archive/data/<yyyy>/<mm>
  36. <task>.install.files : all_<yyyy><mm>.dat
  37. <task>.install.dir : /local/data/<yyyy>
  38. If it is no problem that some files to be installed are not present in the archive,
  39. use the 'optional' list instead of next to the 'files' list:
  40. <task>.install.optional : eventually.tar
  41. If only files should be installed that match a file name pattern, use
  42. (only works for archives on the local disk yet):
  43. <task>.install.match : for_all_<yyyy>_*.tar
  44. On some systems only a temporary scratch disk is present at run time.
  45. The user should ensure that all input data is present on this local disk.
  46. A useful UNIX command for this is 'rsync', which syncronizes the content
  47. of a local directory with another directory. Use the following specification
  48. for a list of files or directories that need to be present in the 'dir'
  49. and should be synchronized with the version present in 'arch' :
  50. <task>.install.rsync : TMtree
  51. The installtion performs a time loop over the 'jobstep.timerange' ;
  52. the 'dir', 'arch', 'files', 'optional', and 'rsync' values could then include
  53. special keys '<yyyy>', '<mm>', or '<dd>'
  54. that are replaced by the year, month, and day values respectively:
  55. <task>.install.files : zooi_<yyyy><mm>.tar
  56. CREATE OUTPUT DIRECTORIES
  57. The main output directory is specified in the rcfile:
  58. output.dir : /scratch/run/output
  59. A list of output types specifies the individual outputs:
  60. output.types : conc station budget
  61. For each of these types, a sub directory is specified:
  62. conc.output.subdir : CONC
  63. station.output.subdir : STATION
  64. USER SCRIPTS
  65. Provide a ';' seperated list with script commands to be exectuted:
  66. input.user.scripts : input.sc ; meteo-setup
  67. Eventually add arguments; if one of the arguments is '<rcfile>'
  68. this is replaced by the name of the runtime rcfile:
  69. input.user.scripts : meteo-setup -v <rcfile>
  70. """
  71. # -----------------------------------------------
  72. # external
  73. # -----------------------------------------------
  74. # standard modules:
  75. import sys
  76. import os
  77. import shutil
  78. import fnmatch
  79. import optparse
  80. import logging
  81. import datetime
  82. # -----------------------------------------------
  83. # logging
  84. # -----------------------------------------------
  85. # setup messages:
  86. logging.basicConfig( format='%(lineno)-4s:%(filename)-30s [%(levelname)-8s] %(message)s', level=logging.INFO, stream=sys.stdout )
  87. # -----------------------------------------------
  88. # default values
  89. # -----------------------------------------------
  90. # location of auxilary scripts:
  91. bindir_default = os.curdir
  92. # -----------------------------------------------
  93. # arguments
  94. # -----------------------------------------------
  95. # set text for 'usage' help line:
  96. usage = "%prog <rcfile>"
  97. # initialise the option parser:
  98. parser = optparse.OptionParser(usage=usage)
  99. # define options:
  100. parser.add_option( "--bindir",
  101. help="location of auxilary scripts (%s)" % bindir_default,
  102. dest="bindir", action="store", default=bindir_default )
  103. # now parse the actual arguments;
  104. # return an object 'opts' with fields 'verbose' etc,
  105. # and the unnamed arguments in the list 'args' :
  106. opts,args = parser.parse_args()
  107. # only one argument ...
  108. if len(args) != 1 :
  109. if opts.verbose : logging.error( 'single argument command should be specified, found : %i' % len(args) )
  110. parser.print_usage()
  111. sys.exit(1)
  112. #endif
  113. # extract ...
  114. rcfile = args[0]
  115. # -----------------------------------------------
  116. # toolboxes
  117. # -----------------------------------------------
  118. # location of scripts:
  119. scriptdir = opts.bindir
  120. # prepend locations of python modules to search path:
  121. sys.path.insert( 0, scriptdir )
  122. # local modules:
  123. import rc
  124. import go
  125. # -----------------------------------------------
  126. # begin
  127. # -----------------------------------------------
  128. # info ...
  129. logging.info( 'start' )
  130. # read settings:
  131. rcf = rc.RcFile( rcfile )
  132. #
  133. # install tasks
  134. #
  135. # info ...
  136. logging.info( 'install input files if necessary ...' )
  137. # list with input types:
  138. install_tasks = rcf.get( 'install.tasks' ).split()
  139. # loop over all tasks:
  140. for install_task in install_tasks :
  141. # info ...
  142. logging.info( ' task %s ...' % install_task )
  143. # time format in rcfiles:
  144. tfmt = '%Y-%m-%d %H:%M:%S'
  145. # time range of current job step:
  146. if sys.version_info[0:3] <= (2,4,3) :
  147. t1 = datetime.datetime( *map(int,rcf.get('jobstep.timerange.start').replace('-',' ').replace(':',' ').split()) )
  148. t2 = datetime.datetime( *map(int,rcf.get('jobstep.timerange.end' ).replace('-',' ').replace(':',' ').split()) )
  149. else :
  150. t1 = datetime.datetime.strptime( rcf.get('jobstep.timerange.start'), tfmt )
  151. t2 = datetime.datetime.strptime( rcf.get('jobstep.timerange.end' ), tfmt )
  152. #endif
  153. # loop over days:
  154. dt = datetime.timedelta(1) # days
  155. # list with processed directories and file names;
  156. # this is to avoid repeated installation of (monthly?) files in the loop over days:
  157. processed_dirs = []
  158. processed_files = []
  159. processed_rsyncs = []
  160. # store directory listings, this will save a lot of time if a remote directories have to be listed:
  161. install_arch_listings = {}
  162. # time loop:
  163. t = t1
  164. while t < t2 :
  165. ## info ...
  166. #logging.info( ' timeloop: %s' % str(t) )
  167. # short values:
  168. yyyy,mm,dd = '%4.4i' % t.year, '%2.2i' % t.month, '%2.2i' % t.day
  169. #
  170. # * installation directory
  171. #
  172. # local directory where install should be installed:
  173. install_dir = rcf.get( '%s.install.dir' % install_task )
  174. # replace time keys:
  175. install_dir = install_dir.replace('<yyyy>',yyyy).replace('<mm>',mm).replace('<dd>',dd)
  176. # not processed yet ?
  177. if install_dir not in processed_dirs :
  178. # info ...
  179. logging.info( ' install in %s ...' % install_dir )
  180. # add to list to avoid new messages:
  181. processed_dirs.append(install_dir)
  182. #endif
  183. # create if necessary:
  184. if not os.path.exists(install_dir) :
  185. # info ...
  186. logging.info( ' create ...' )
  187. # create including parent directories:
  188. os.makedirs(install_dir)
  189. #endif
  190. #
  191. # * install files
  192. #
  193. # names of files to be present:
  194. install_files = rcf.get( '%s.install.files' % install_task, default='' ).split()
  195. # names of optional files:
  196. install_optional = rcf.get( '%s.install.optional' % install_task, default='' ).split()
  197. # names of files matching a pattern:
  198. install_match = rcf.get( '%s.install.match' % install_task, default='' ).split()
  199. # collect:
  200. install_file_templates = install_files + install_optional + install_match
  201. # not empty ? then start installing files:
  202. if len(install_file_templates) > 0 :
  203. # archive directories:
  204. install_archs = rcf.get( '%s.install.arch' % install_task ).split()
  205. # loop over all archives where the files might be found:
  206. for install_arch in install_archs :
  207. # replace time keys:
  208. install_arch = install_arch.replace('<yyyy>',yyyy).replace('<mm>',mm).replace('<dd>',dd)
  209. ## info ...
  210. #logging.info( 'list %s ...' % install_arch )
  211. # no listing of this archive available yet ?
  212. if install_arch not in install_arch_listings.keys() :
  213. # command to list all files in archive directory:
  214. command = [ os.path.join(scriptdir,'gss'), 'list', install_arch ]
  215. # execute:
  216. try :
  217. p = go.subprocess.call( command )
  218. except Exception as err :
  219. logging.error( err )
  220. sys.exit(1)
  221. #endtry
  222. # extract files:
  223. install_arch_files = []
  224. for line in p.stdout : install_arch_files.append(line)
  225. # store:
  226. install_arch_listings[install_arch] = install_arch_files
  227. #endfor
  228. # loop over files (file patterns) to be installed:
  229. for install_file_template in install_file_templates :
  230. # replace time keys:
  231. install_file_thisday = install_file_template.replace('<yyyy>',yyyy).replace('<mm>',mm).replace('<dd>',dd)
  232. # list with (matching?) files:
  233. if install_file_template in install_match :
  234. # pattern matchin ; returned list might be empty:
  235. install_file_matches = fnmatch.filter( install_arch_listings[install_arch], install_file_thisday )
  236. else :
  237. # no pattern matching, just a single filename which should be installed without restrictions:
  238. install_file_matches = [ install_file_thisday ]
  239. #endif
  240. # loop over matching files; might be empty:
  241. for install_file in install_file_matches :
  242. ## info ...
  243. #logging.info( ' timeloop: current file: %s' % install_file )
  244. # already processed ?
  245. if install_file in processed_files :
  246. ## info ...
  247. #logging.info( ' timeloop: already processed ; try next ...' )
  248. # try next:
  249. continue
  250. #endif
  251. # processed now, so add to list:
  252. processed_files.append(install_file)
  253. # already present ?
  254. if os.path.exists(os.path.join(install_dir,install_file)) :
  255. # info ...
  256. logging.info( ' found %s ...' % install_file )
  257. # next:
  258. continue
  259. #endif
  260. # info ...
  261. logging.info( ' retrieve %s ...' % os.path.join(install_arch,install_file) )
  262. # command to check if file in archive extists:
  263. command = [ os.path.join(scriptdir,'gss'), 'exist', os.path.join(install_arch,install_file) ]
  264. # execute:
  265. try :
  266. p = go.subprocess.call( command )
  267. except go.subprocess.StatusError as err :
  268. # warning message:
  269. logging.info( ' file not found, or error in testing presence in archive ...' )
  270. # optional anyway ?
  271. if install_file in install_optional :
  272. logging.warning( ' file is optional anyway, continue ...' )
  273. continue
  274. #endif
  275. # problem ...
  276. logging.error( ' Could not install this file, not found in the archive.' )
  277. logging.error( ' If this is not problem, then specify in the rcfile that the files to be installed are optional:' )
  278. logging.error( ' %s.install.optional : %s' % (install_task,install_file_template) )
  279. sys.exit(1)
  280. except Exception as err :
  281. logging.error( err )
  282. sys.exit(1)
  283. #endtry
  284. # command to get file from archive:
  285. command = [ os.path.join(scriptdir,'gss'), 'copy', os.path.join(install_arch,install_file), os.path.join(install_dir,install_file) ]
  286. # execute:
  287. try :
  288. p = go.subprocess.log_call( command )
  289. except Exception as err :
  290. logging.error( err )
  291. sys.exit(1)
  292. #endtry
  293. # check for extensions .gz etc; define corresponding unzippers:
  294. unzipper = { '.gz' : 'gunzip',
  295. '.bz2' : 'bunzip2',
  296. '.Z' : 'uncompress',
  297. '.zip' : 'unzip ' }
  298. # loop over all supported extensions:
  299. for ext in unzipper.keys() :
  300. # file has this extension ?
  301. if install_file.endswith(ext) :
  302. # info ...
  303. logging.info( ' unzip ...' )
  304. # goto input directory:
  305. owd = os.getcwd()
  306. os.chdir( install_dir )
  307. # command to unzip the file:
  308. command = [ unzipper[ext], install_file ]
  309. # execute:
  310. try :
  311. p = go.subprocess.log_call( command )
  312. except Exception as err :
  313. logging.error( err )
  314. sys.exit(1)
  315. #endtry
  316. # back ...
  317. os.chdir( owd )
  318. #endif
  319. #endfor
  320. # unpack ?
  321. if install_file.endswith('.tar') :
  322. # info ...
  323. logging.info( ' unpack ...' )
  324. # goto input directory:
  325. owd = os.getcwd()
  326. os.chdir( install_dir )
  327. # command to unpack the file;
  328. # do not use the 'tar x -f etc' since some machine have a very old tar command ...
  329. command = [ 'tar', 'xf', install_file ]
  330. # execute:
  331. try :
  332. p = go.subprocess.log_call( command )
  333. except Exception as err :
  334. logging.error( err )
  335. sys.exit(1)
  336. #endtry
  337. # back ...
  338. os.chdir( owd )
  339. #endif
  340. #endfor # matching input files
  341. #endfor # input file templates
  342. #endfor # input archives
  343. #endfor # files to be installed ?
  344. #
  345. # * synchronize directories
  346. #
  347. # names of files/directories to be synchronized:
  348. filedirs = rcf.get( '%s.install.rsync' % install_task, default='' ).split()
  349. # loop over files:
  350. for filedir in filedirs :
  351. # archive directory:
  352. install_arch = rcf.get( '%s.install.arch' % install_task )
  353. # replace time keys:
  354. filedir = filedir.replace('<yyyy>',yyyy).replace('<mm>',mm).replace('<dd>',dd)
  355. install_arch = install_arch.replace('<yyyy>',yyyy).replace('<mm>',mm).replace('<dd>',dd)
  356. # already processed ?
  357. if filedir in processed_rsyncs : continue
  358. # processed now; add to list:
  359. processed_rsyncs.append( filedir )
  360. # info ...
  361. logging.info( ' syncronize %s ...' % os.path.join(install_arch,filedir) )
  362. # syncronization command:
  363. command = [ 'rsync', '-a', os.path.join(install_arch,filedir), install_dir ]
  364. # execute:
  365. try :
  366. p = go.subprocess.log_call( command )
  367. except Exception as err :
  368. logging.error( err )
  369. sys.exit(1)
  370. #endtry
  371. #endfor # files/directories
  372. #
  373. # *
  374. #
  375. # next value in timeloop:
  376. t = t + dt
  377. #endwhile # time loop
  378. #endfor # input tasks
  379. #
  380. # output directories
  381. #
  382. # create output directory:
  383. output_dir = rcf.get('output.dir')
  384. if not os.path.exists(output_dir) :
  385. logging.info( ' create %s ...' % output_dir )
  386. os.makedirs(output_dir)
  387. #endif
  388. # create output subdirectories:
  389. for typ in rcf.get('output.types').split() :
  390. # enabled ?
  391. flag = rcf.get( typ+'.output', 'bool', default=True )
  392. if flag :
  393. # read name of subdirectory:
  394. subdir = rcf.get( typ+'.output.subdir', default=None )
  395. # full path:
  396. mdir = os.path.join( output_dir, subdir )
  397. if not os.path.exists(mdir) :
  398. logging.info( ' create %s ...' % mdir )
  399. os.makedirs(mdir)
  400. #endif
  401. #endif
  402. #endfor
  403. #
  404. # user scripts
  405. #
  406. # info ...
  407. logging.info( 'call user scripts if necessary ...' )
  408. # list with other scripts to be called:
  409. user_scripts = rcf.get( 'input.user.scripts' )
  410. # any request ?
  411. if user_scripts != 'None' :
  412. # loop over sripts:
  413. for user_script in user_scripts.split(';') :
  414. # info ...
  415. logging.info( ' call script "%s" ...' % user_script )
  416. # command to call the script; replace some keywords:
  417. command = user_script
  418. command = command.replace('<bindir>',opts.bindir)
  419. command = command.replace('<rcfile>',rcfile)
  420. # execute within a shell, never know if there are '*' etc in the command line:
  421. try :
  422. p = go.subprocess.watch_call( command, shell=True )
  423. except Exception as err :
  424. logging.error( err )
  425. sys.exit(1)
  426. #endtry
  427. #endfor # user uscripts
  428. #endif # list of scripts specified
  429. #
  430. # ok
  431. #
  432. # info ...
  433. logging.info( 'end' )
  434. # -----------------------------------------------
  435. # end
  436. # -----------------------------------------------