123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563 |
- #! /usr/bin/env python3
- # -----------------------------------------------
- # help
- # -----------------------------------------------
- """
- NAME
- submit_tm5_step_init
- DESCRIPTION
- Script to setup the run directory for the actual model run:
- o install input files
- o create output directories
- o run special user setup scripts
- INSTALLATION OF INPUT FILES
- Which files should be installed, where to find them, and where to install them
- is all specified in the rcfile.
- Specify space seperated list with install tasks:
- install.tasks : obs diffusion
- For each task, a number of specific settings should be defined somewhere.
- To have just a directory created, only specify the the local directory:
- <task>.install.dir : /local/data/
- To have a number of files copied from an archive into this local directory,
- also specify the archive location and a space seperated list with file names:
- <task>.install.arch : /archive/data/
- <task>.install.files : krep.dat zooi.tar morezooi.tar
- If a file ends with '.tar' it is unpacked automatically.
- The archive description should be understandible by the 'gss' scripts:
- /archive/data/
- ecfs:/tmx/TM/data # ECMWF tape archive when running on ECMWF computer
- ec:ecfs[tmx]:TM/data # idem from a remote computer using EcAccess tools
- mos:/fa/ks/TM/data # KNMI tape archive
- If the archive consists of more than one directory, specify a space seperated list:
- <task>.install.arch : /archive/data/A /archive/data/B
- Both the installation directory, the archive, and the file name might contain
- keys '<yyyy>', '<mm>', and '<dd>' for the year/month/day withing the job timerange:
- <task>.install.arch : /archive/data/<yyyy>/<mm>
- <task>.install.files : all_<yyyy><mm>.dat
- <task>.install.dir : /local/data/<yyyy>
- If it is no problem that some files to be installed are not present in the archive,
- use the 'optional' list instead of next to the 'files' list:
- <task>.install.optional : eventually.tar
- If only files should be installed that match a file name pattern, use
- (only works for archives on the local disk yet):
- <task>.install.match : for_all_<yyyy>_*.tar
- On some systems only a temporary scratch disk is present at run time.
- The user should ensure that all input data is present on this local disk.
- A useful UNIX command for this is 'rsync', which syncronizes the content
- of a local directory with another directory. Use the following specification
- for a list of files or directories that need to be present in the 'dir'
- and should be synchronized with the version present in 'arch' :
- <task>.install.rsync : TMtree
- The installtion performs a time loop over the 'jobstep.timerange' ;
- the 'dir', 'arch', 'files', 'optional', and 'rsync' values could then include
- special keys '<yyyy>', '<mm>', or '<dd>'
- that are replaced by the year, month, and day values respectively:
- <task>.install.files : zooi_<yyyy><mm>.tar
- CREATE OUTPUT DIRECTORIES
- The main output directory is specified in the rcfile:
- output.dir : /scratch/run/output
- A list of output types specifies the individual outputs:
- output.types : conc station budget
- For each of these types, a sub directory is specified:
- conc.output.subdir : CONC
- station.output.subdir : STATION
- USER SCRIPTS
- Provide a ';' seperated list with script commands to be exectuted:
- input.user.scripts : input.sc ; meteo-setup
- Eventually add arguments; if one of the arguments is '<rcfile>'
- this is replaced by the name of the runtime rcfile:
- input.user.scripts : meteo-setup -v <rcfile>
- """
- # -----------------------------------------------
- # external
- # -----------------------------------------------
- # standard modules:
- import sys
- import os
- import shutil
- import fnmatch
- import optparse
- import logging
- import datetime
- # -----------------------------------------------
- # logging
- # -----------------------------------------------
- # setup messages:
- logging.basicConfig( format='%(lineno)-4s:%(filename)-30s [%(levelname)-8s] %(message)s', level=logging.INFO, stream=sys.stdout )
- # -----------------------------------------------
- # default values
- # -----------------------------------------------
- # location of auxilary scripts:
- bindir_default = os.curdir
- # -----------------------------------------------
- # arguments
- # -----------------------------------------------
- # set text for 'usage' help line:
- usage = "%prog <rcfile>"
- # initialise the option parser:
- parser = optparse.OptionParser(usage=usage)
- # define options:
- parser.add_option( "--bindir",
- help="location of auxilary scripts (%s)" % bindir_default,
- dest="bindir", action="store", default=bindir_default )
- # now parse the actual arguments;
- # return an object 'opts' with fields 'verbose' etc,
- # and the unnamed arguments in the list 'args' :
- opts,args = parser.parse_args()
- # only one argument ...
- if len(args) != 1 :
- if opts.verbose : logging.error( 'single argument command should be specified, found : %i' % len(args) )
- parser.print_usage()
- sys.exit(1)
- #endif
- # extract ...
- rcfile = args[0]
- # -----------------------------------------------
- # toolboxes
- # -----------------------------------------------
- # location of scripts:
- scriptdir = opts.bindir
- # prepend locations of python modules to search path:
- sys.path.insert( 0, scriptdir )
- # local modules:
- import rc
- import go
- # -----------------------------------------------
- # begin
- # -----------------------------------------------
- # info ...
- logging.info( 'start' )
- # read settings:
- rcf = rc.RcFile( rcfile )
- #
- # install tasks
- #
- # info ...
- logging.info( 'install input files if necessary ...' )
- # list with input types:
- install_tasks = rcf.get( 'install.tasks' ).split()
- # loop over all tasks:
- for install_task in install_tasks :
- # info ...
- logging.info( ' task %s ...' % install_task )
- # time format in rcfiles:
- tfmt = '%Y-%m-%d %H:%M:%S'
- # time range of current job step:
- if sys.version_info[0:3] <= (2,4,3) :
- t1 = datetime.datetime( *map(int,rcf.get('jobstep.timerange.start').replace('-',' ').replace(':',' ').split()) )
- t2 = datetime.datetime( *map(int,rcf.get('jobstep.timerange.end' ).replace('-',' ').replace(':',' ').split()) )
- else :
- t1 = datetime.datetime.strptime( rcf.get('jobstep.timerange.start'), tfmt )
- t2 = datetime.datetime.strptime( rcf.get('jobstep.timerange.end' ), tfmt )
- #endif
- # loop over days:
- dt = datetime.timedelta(1) # days
- # list with processed directories and file names;
- # this is to avoid repeated installation of (monthly?) files in the loop over days:
- processed_dirs = []
- processed_files = []
- processed_rsyncs = []
- # store directory listings, this will save a lot of time if a remote directories have to be listed:
- install_arch_listings = {}
- # time loop:
- t = t1
- while t < t2 :
- ## info ...
- #logging.info( ' timeloop: %s' % str(t) )
- # short values:
- yyyy,mm,dd = '%4.4i' % t.year, '%2.2i' % t.month, '%2.2i' % t.day
- #
- # * installation directory
- #
- # local directory where install should be installed:
- install_dir = rcf.get( '%s.install.dir' % install_task )
- # replace time keys:
- install_dir = install_dir.replace('<yyyy>',yyyy).replace('<mm>',mm).replace('<dd>',dd)
- # not processed yet ?
- if install_dir not in processed_dirs :
- # info ...
- logging.info( ' install in %s ...' % install_dir )
- # add to list to avoid new messages:
- processed_dirs.append(install_dir)
- #endif
- # create if necessary:
- if not os.path.exists(install_dir) :
- # info ...
- logging.info( ' create ...' )
- # create including parent directories:
- os.makedirs(install_dir)
- #endif
- #
- # * install files
- #
- # names of files to be present:
- install_files = rcf.get( '%s.install.files' % install_task, default='' ).split()
- # names of optional files:
- install_optional = rcf.get( '%s.install.optional' % install_task, default='' ).split()
- # names of files matching a pattern:
- install_match = rcf.get( '%s.install.match' % install_task, default='' ).split()
- # collect:
- install_file_templates = install_files + install_optional + install_match
- # not empty ? then start installing files:
- if len(install_file_templates) > 0 :
- # archive directories:
- install_archs = rcf.get( '%s.install.arch' % install_task ).split()
- # loop over all archives where the files might be found:
- for install_arch in install_archs :
- # replace time keys:
- install_arch = install_arch.replace('<yyyy>',yyyy).replace('<mm>',mm).replace('<dd>',dd)
- ## info ...
- #logging.info( 'list %s ...' % install_arch )
- # no listing of this archive available yet ?
- if install_arch not in install_arch_listings.keys() :
- # command to list all files in archive directory:
- command = [ os.path.join(scriptdir,'gss'), 'list', install_arch ]
- # execute:
- try :
- p = go.subprocess.call( command )
- except Exception as err :
- logging.error( err )
- sys.exit(1)
- #endtry
- # extract files:
- install_arch_files = []
- for line in p.stdout : install_arch_files.append(line)
- # store:
- install_arch_listings[install_arch] = install_arch_files
- #endfor
- # loop over files (file patterns) to be installed:
- for install_file_template in install_file_templates :
- # replace time keys:
- install_file_thisday = install_file_template.replace('<yyyy>',yyyy).replace('<mm>',mm).replace('<dd>',dd)
- # list with (matching?) files:
- if install_file_template in install_match :
- # pattern matchin ; returned list might be empty:
- install_file_matches = fnmatch.filter( install_arch_listings[install_arch], install_file_thisday )
- else :
- # no pattern matching, just a single filename which should be installed without restrictions:
- install_file_matches = [ install_file_thisday ]
- #endif
- # loop over matching files; might be empty:
- for install_file in install_file_matches :
- ## info ...
- #logging.info( ' timeloop: current file: %s' % install_file )
- # already processed ?
- if install_file in processed_files :
- ## info ...
- #logging.info( ' timeloop: already processed ; try next ...' )
- # try next:
- continue
- #endif
- # processed now, so add to list:
- processed_files.append(install_file)
- # already present ?
- if os.path.exists(os.path.join(install_dir,install_file)) :
- # info ...
- logging.info( ' found %s ...' % install_file )
- # next:
- continue
- #endif
- # info ...
- logging.info( ' retrieve %s ...' % os.path.join(install_arch,install_file) )
- # command to check if file in archive extists:
- command = [ os.path.join(scriptdir,'gss'), 'exist', os.path.join(install_arch,install_file) ]
- # execute:
- try :
- p = go.subprocess.call( command )
- except go.subprocess.StatusError as err :
- # warning message:
- logging.info( ' file not found, or error in testing presence in archive ...' )
- # optional anyway ?
- if install_file in install_optional :
- logging.warning( ' file is optional anyway, continue ...' )
- continue
- #endif
- # problem ...
- logging.error( ' Could not install this file, not found in the archive.' )
- logging.error( ' If this is not problem, then specify in the rcfile that the files to be installed are optional:' )
- logging.error( ' %s.install.optional : %s' % (install_task,install_file_template) )
- sys.exit(1)
- except Exception as err :
- logging.error( err )
- sys.exit(1)
- #endtry
- # command to get file from archive:
- command = [ os.path.join(scriptdir,'gss'), 'copy', os.path.join(install_arch,install_file), os.path.join(install_dir,install_file) ]
- # execute:
- try :
- p = go.subprocess.log_call( command )
- except Exception as err :
- logging.error( err )
- sys.exit(1)
- #endtry
- # check for extensions .gz etc; define corresponding unzippers:
- unzipper = { '.gz' : 'gunzip',
- '.bz2' : 'bunzip2',
- '.Z' : 'uncompress',
- '.zip' : 'unzip ' }
- # loop over all supported extensions:
- for ext in unzipper.keys() :
- # file has this extension ?
- if install_file.endswith(ext) :
- # info ...
- logging.info( ' unzip ...' )
- # goto input directory:
- owd = os.getcwd()
- os.chdir( install_dir )
- # command to unzip the file:
- command = [ unzipper[ext], install_file ]
- # execute:
- try :
- p = go.subprocess.log_call( command )
- except Exception as err :
- logging.error( err )
- sys.exit(1)
- #endtry
- # back ...
- os.chdir( owd )
- #endif
- #endfor
- # unpack ?
- if install_file.endswith('.tar') :
- # info ...
- logging.info( ' unpack ...' )
- # goto input directory:
- owd = os.getcwd()
- os.chdir( install_dir )
- # command to unpack the file;
- # do not use the 'tar x -f etc' since some machine have a very old tar command ...
- command = [ 'tar', 'xf', install_file ]
- # execute:
- try :
- p = go.subprocess.log_call( command )
- except Exception as err :
- logging.error( err )
- sys.exit(1)
- #endtry
- # back ...
- os.chdir( owd )
- #endif
- #endfor # matching input files
- #endfor # input file templates
- #endfor # input archives
- #endfor # files to be installed ?
- #
- # * synchronize directories
- #
- # names of files/directories to be synchronized:
- filedirs = rcf.get( '%s.install.rsync' % install_task, default='' ).split()
- # loop over files:
- for filedir in filedirs :
- # archive directory:
- install_arch = rcf.get( '%s.install.arch' % install_task )
- # replace time keys:
- filedir = filedir.replace('<yyyy>',yyyy).replace('<mm>',mm).replace('<dd>',dd)
- install_arch = install_arch.replace('<yyyy>',yyyy).replace('<mm>',mm).replace('<dd>',dd)
- # already processed ?
- if filedir in processed_rsyncs : continue
- # processed now; add to list:
- processed_rsyncs.append( filedir )
- # info ...
- logging.info( ' syncronize %s ...' % os.path.join(install_arch,filedir) )
- # syncronization command:
- command = [ 'rsync', '-a', os.path.join(install_arch,filedir), install_dir ]
- # execute:
- try :
- p = go.subprocess.log_call( command )
- except Exception as err :
- logging.error( err )
- sys.exit(1)
- #endtry
- #endfor # files/directories
- #
- # *
- #
- # next value in timeloop:
- t = t + dt
- #endwhile # time loop
- #endfor # input tasks
- #
- # output directories
- #
- # create output directory:
- output_dir = rcf.get('output.dir')
- if not os.path.exists(output_dir) :
- logging.info( ' create %s ...' % output_dir )
- os.makedirs(output_dir)
- #endif
- # create output subdirectories:
- for typ in rcf.get('output.types').split() :
- # enabled ?
- flag = rcf.get( typ+'.output', 'bool', default=True )
- if flag :
- # read name of subdirectory:
- subdir = rcf.get( typ+'.output.subdir', default=None )
- # full path:
- mdir = os.path.join( output_dir, subdir )
- if not os.path.exists(mdir) :
- logging.info( ' create %s ...' % mdir )
- os.makedirs(mdir)
- #endif
- #endif
- #endfor
- #
- # user scripts
- #
- # info ...
- logging.info( 'call user scripts if necessary ...' )
- # list with other scripts to be called:
- user_scripts = rcf.get( 'input.user.scripts' )
- # any request ?
- if user_scripts != 'None' :
- # loop over sripts:
- for user_script in user_scripts.split(';') :
- # info ...
- logging.info( ' call script "%s" ...' % user_script )
- # command to call the script; replace some keywords:
- command = user_script
- command = command.replace('<bindir>',opts.bindir)
- command = command.replace('<rcfile>',rcfile)
- # execute within a shell, never know if there are '*' etc in the command line:
- try :
- p = go.subprocess.watch_call( command, shell=True )
- except Exception as err :
- logging.error( err )
- sys.exit(1)
- #endtry
- #endfor # user uscripts
- #endif # list of scripts specified
- #
- # ok
- #
- # info ...
- logging.info( 'end' )
- # -----------------------------------------------
- # end
- # -----------------------------------------------
|