#! /usr/bin/env python3 # ----------------------------------------------- # help # ----------------------------------------------- """ NAME submit_tm5_step_init DESCRIPTION Script to setup the run directory for the actual model run: o install input files o create output directories o run special user setup scripts INSTALLATION OF INPUT FILES Which files should be installed, where to find them, and where to install them is all specified in the rcfile. Specify space seperated list with install tasks: install.tasks : obs diffusion For each task, a number of specific settings should be defined somewhere. To have just a directory created, only specify the the local directory: .install.dir : /local/data/ To have a number of files copied from an archive into this local directory, also specify the archive location and a space seperated list with file names: .install.arch : /archive/data/ .install.files : krep.dat zooi.tar morezooi.tar If a file ends with '.tar' it is unpacked automatically. The archive description should be understandible by the 'gss' scripts: /archive/data/ ecfs:/tmx/TM/data # ECMWF tape archive when running on ECMWF computer ec:ecfs[tmx]:TM/data # idem from a remote computer using EcAccess tools mos:/fa/ks/TM/data # KNMI tape archive If the archive consists of more than one directory, specify a space seperated list: .install.arch : /archive/data/A /archive/data/B Both the installation directory, the archive, and the file name might contain keys '', '', and '
' for the year/month/day withing the job timerange: .install.arch : /archive/data// .install.files : all_.dat .install.dir : /local/data/ If it is no problem that some files to be installed are not present in the archive, use the 'optional' list instead of next to the 'files' list: .install.optional : eventually.tar If only files should be installed that match a file name pattern, use (only works for archives on the local disk yet): .install.match : for_all__*.tar On some systems only a temporary scratch disk is present at run time. The user should ensure that all input data is present on this local disk. A useful UNIX command for this is 'rsync', which syncronizes the content of a local directory with another directory. Use the following specification for a list of files or directories that need to be present in the 'dir' and should be synchronized with the version present in 'arch' : .install.rsync : TMtree The installtion performs a time loop over the 'jobstep.timerange' ; the 'dir', 'arch', 'files', 'optional', and 'rsync' values could then include special keys '', '', or '
' that are replaced by the year, month, and day values respectively: .install.files : zooi_.tar CREATE OUTPUT DIRECTORIES The main output directory is specified in the rcfile: output.dir : /scratch/run/output A list of output types specifies the individual outputs: output.types : conc station budget For each of these types, a sub directory is specified: conc.output.subdir : CONC station.output.subdir : STATION USER SCRIPTS Provide a ';' seperated list with script commands to be exectuted: input.user.scripts : input.sc ; meteo-setup Eventually add arguments; if one of the arguments is '' this is replaced by the name of the runtime rcfile: input.user.scripts : meteo-setup -v """ # ----------------------------------------------- # external # ----------------------------------------------- # standard modules: import sys import os import shutil import fnmatch import optparse import logging import datetime # ----------------------------------------------- # logging # ----------------------------------------------- # setup messages: logging.basicConfig( format='%(lineno)-4s:%(filename)-30s [%(levelname)-8s] %(message)s', level=logging.INFO, stream=sys.stdout ) # ----------------------------------------------- # default values # ----------------------------------------------- # location of auxilary scripts: bindir_default = os.curdir # ----------------------------------------------- # arguments # ----------------------------------------------- # set text for 'usage' help line: usage = "%prog " # initialise the option parser: parser = optparse.OptionParser(usage=usage) # define options: parser.add_option( "--bindir", help="location of auxilary scripts (%s)" % bindir_default, dest="bindir", action="store", default=bindir_default ) # now parse the actual arguments; # return an object 'opts' with fields 'verbose' etc, # and the unnamed arguments in the list 'args' : opts,args = parser.parse_args() # only one argument ... if len(args) != 1 : if opts.verbose : logging.error( 'single argument command should be specified, found : %i' % len(args) ) parser.print_usage() sys.exit(1) #endif # extract ... rcfile = args[0] # ----------------------------------------------- # toolboxes # ----------------------------------------------- # location of scripts: scriptdir = opts.bindir # prepend locations of python modules to search path: sys.path.insert( 0, scriptdir ) # local modules: import rc import go # ----------------------------------------------- # begin # ----------------------------------------------- # info ... logging.info( 'start' ) # read settings: rcf = rc.RcFile( rcfile ) # # install tasks # # info ... logging.info( 'install input files if necessary ...' ) # list with input types: install_tasks = rcf.get( 'install.tasks' ).split() # loop over all tasks: for install_task in install_tasks : # info ... logging.info( ' task %s ...' % install_task ) # time format in rcfiles: tfmt = '%Y-%m-%d %H:%M:%S' # time range of current job step: if sys.version_info[0:3] <= (2,4,3) : t1 = datetime.datetime( *map(int,rcf.get('jobstep.timerange.start').replace('-',' ').replace(':',' ').split()) ) t2 = datetime.datetime( *map(int,rcf.get('jobstep.timerange.end' ).replace('-',' ').replace(':',' ').split()) ) else : t1 = datetime.datetime.strptime( rcf.get('jobstep.timerange.start'), tfmt ) t2 = datetime.datetime.strptime( rcf.get('jobstep.timerange.end' ), tfmt ) #endif # loop over days: dt = datetime.timedelta(1) # days # list with processed directories and file names; # this is to avoid repeated installation of (monthly?) files in the loop over days: processed_dirs = [] processed_files = [] processed_rsyncs = [] # store directory listings, this will save a lot of time if a remote directories have to be listed: install_arch_listings = {} # time loop: t = t1 while t < t2 : ## info ... #logging.info( ' timeloop: %s' % str(t) ) # short values: yyyy,mm,dd = '%4.4i' % t.year, '%2.2i' % t.month, '%2.2i' % t.day # # * installation directory # # local directory where install should be installed: install_dir = rcf.get( '%s.install.dir' % install_task ) # replace time keys: install_dir = install_dir.replace('',yyyy).replace('',mm).replace('
',dd) # not processed yet ? if install_dir not in processed_dirs : # info ... logging.info( ' install in %s ...' % install_dir ) # add to list to avoid new messages: processed_dirs.append(install_dir) #endif # create if necessary: if not os.path.exists(install_dir) : # info ... logging.info( ' create ...' ) # create including parent directories: os.makedirs(install_dir) #endif # # * install files # # names of files to be present: install_files = rcf.get( '%s.install.files' % install_task, default='' ).split() # names of optional files: install_optional = rcf.get( '%s.install.optional' % install_task, default='' ).split() # names of files matching a pattern: install_match = rcf.get( '%s.install.match' % install_task, default='' ).split() # collect: install_file_templates = install_files + install_optional + install_match # not empty ? then start installing files: if len(install_file_templates) > 0 : # archive directories: install_archs = rcf.get( '%s.install.arch' % install_task ).split() # loop over all archives where the files might be found: for install_arch in install_archs : # replace time keys: install_arch = install_arch.replace('',yyyy).replace('',mm).replace('
',dd) ## info ... #logging.info( 'list %s ...' % install_arch ) # no listing of this archive available yet ? if install_arch not in install_arch_listings.keys() : # command to list all files in archive directory: command = [ os.path.join(scriptdir,'gss'), 'list', install_arch ] # execute: try : p = go.subprocess.call( command ) except Exception as err : logging.error( err ) sys.exit(1) #endtry # extract files: install_arch_files = [] for line in p.stdout : install_arch_files.append(line) # store: install_arch_listings[install_arch] = install_arch_files #endfor # loop over files (file patterns) to be installed: for install_file_template in install_file_templates : # replace time keys: install_file_thisday = install_file_template.replace('',yyyy).replace('',mm).replace('
',dd) # list with (matching?) files: if install_file_template in install_match : # pattern matchin ; returned list might be empty: install_file_matches = fnmatch.filter( install_arch_listings[install_arch], install_file_thisday ) else : # no pattern matching, just a single filename which should be installed without restrictions: install_file_matches = [ install_file_thisday ] #endif # loop over matching files; might be empty: for install_file in install_file_matches : ## info ... #logging.info( ' timeloop: current file: %s' % install_file ) # already processed ? if install_file in processed_files : ## info ... #logging.info( ' timeloop: already processed ; try next ...' ) # try next: continue #endif # processed now, so add to list: processed_files.append(install_file) # already present ? if os.path.exists(os.path.join(install_dir,install_file)) : # info ... logging.info( ' found %s ...' % install_file ) # next: continue #endif # info ... logging.info( ' retrieve %s ...' % os.path.join(install_arch,install_file) ) # command to check if file in archive extists: command = [ os.path.join(scriptdir,'gss'), 'exist', os.path.join(install_arch,install_file) ] # execute: try : p = go.subprocess.call( command ) except go.subprocess.StatusError as err : # warning message: logging.info( ' file not found, or error in testing presence in archive ...' ) # optional anyway ? if install_file in install_optional : logging.warning( ' file is optional anyway, continue ...' ) continue #endif # problem ... logging.error( ' Could not install this file, not found in the archive.' ) logging.error( ' If this is not problem, then specify in the rcfile that the files to be installed are optional:' ) logging.error( ' %s.install.optional : %s' % (install_task,install_file_template) ) sys.exit(1) except Exception as err : logging.error( err ) sys.exit(1) #endtry # command to get file from archive: command = [ os.path.join(scriptdir,'gss'), 'copy', os.path.join(install_arch,install_file), os.path.join(install_dir,install_file) ] # execute: try : p = go.subprocess.log_call( command ) except Exception as err : logging.error( err ) sys.exit(1) #endtry # check for extensions .gz etc; define corresponding unzippers: unzipper = { '.gz' : 'gunzip', '.bz2' : 'bunzip2', '.Z' : 'uncompress', '.zip' : 'unzip ' } # loop over all supported extensions: for ext in unzipper.keys() : # file has this extension ? if install_file.endswith(ext) : # info ... logging.info( ' unzip ...' ) # goto input directory: owd = os.getcwd() os.chdir( install_dir ) # command to unzip the file: command = [ unzipper[ext], install_file ] # execute: try : p = go.subprocess.log_call( command ) except Exception as err : logging.error( err ) sys.exit(1) #endtry # back ... os.chdir( owd ) #endif #endfor # unpack ? if install_file.endswith('.tar') : # info ... logging.info( ' unpack ...' ) # goto input directory: owd = os.getcwd() os.chdir( install_dir ) # command to unpack the file; # do not use the 'tar x -f etc' since some machine have a very old tar command ... command = [ 'tar', 'xf', install_file ] # execute: try : p = go.subprocess.log_call( command ) except Exception as err : logging.error( err ) sys.exit(1) #endtry # back ... os.chdir( owd ) #endif #endfor # matching input files #endfor # input file templates #endfor # input archives #endfor # files to be installed ? # # * synchronize directories # # names of files/directories to be synchronized: filedirs = rcf.get( '%s.install.rsync' % install_task, default='' ).split() # loop over files: for filedir in filedirs : # archive directory: install_arch = rcf.get( '%s.install.arch' % install_task ) # replace time keys: filedir = filedir.replace('',yyyy).replace('',mm).replace('
',dd) install_arch = install_arch.replace('',yyyy).replace('',mm).replace('
',dd) # already processed ? if filedir in processed_rsyncs : continue # processed now; add to list: processed_rsyncs.append( filedir ) # info ... logging.info( ' syncronize %s ...' % os.path.join(install_arch,filedir) ) # syncronization command: command = [ 'rsync', '-a', os.path.join(install_arch,filedir), install_dir ] # execute: try : p = go.subprocess.log_call( command ) except Exception as err : logging.error( err ) sys.exit(1) #endtry #endfor # files/directories # # * # # next value in timeloop: t = t + dt #endwhile # time loop #endfor # input tasks # # output directories # # create output directory: output_dir = rcf.get('output.dir') if not os.path.exists(output_dir) : logging.info( ' create %s ...' % output_dir ) os.makedirs(output_dir) #endif # create output subdirectories: for typ in rcf.get('output.types').split() : # enabled ? flag = rcf.get( typ+'.output', 'bool', default=True ) if flag : # read name of subdirectory: subdir = rcf.get( typ+'.output.subdir', default=None ) # full path: mdir = os.path.join( output_dir, subdir ) if not os.path.exists(mdir) : logging.info( ' create %s ...' % mdir ) os.makedirs(mdir) #endif #endif #endfor # # user scripts # # info ... logging.info( 'call user scripts if necessary ...' ) # list with other scripts to be called: user_scripts = rcf.get( 'input.user.scripts' ) # any request ? if user_scripts != 'None' : # loop over sripts: for user_script in user_scripts.split(';') : # info ... logging.info( ' call script "%s" ...' % user_script ) # command to call the script; replace some keywords: command = user_script command = command.replace('',opts.bindir) command = command.replace('',rcfile) # execute within a shell, never know if there are '*' etc in the command line: try : p = go.subprocess.watch_call( command, shell=True ) except Exception as err : logging.error( err ) sys.exit(1) #endtry #endfor # user uscripts #endif # list of scripts specified # # ok # # info ... logging.info( 'end' ) # ----------------------------------------------- # end # -----------------------------------------------