#! /usr/bin/env python doc=""" Verify the checksum of the data sets used for a run. The data location is retrieved from either a run directory (-r) or a config-run.xml file, which is parsed for a specific platform (-p). If you do not specify a run directory, a config-run.xml file is used to find the data. The following are tried: ../classic/config-run.xml (relative to where the script is called) ../classic/config-run.xml (relative to where the script is located) You can specify the path and/or the filename of the config file. And if you do not specify a platform to read it, a list of available platforms is printed. Examples: # uses a rundir ece-data-check.py -r /scratch/ms/nl/nm6/ECEARTH-RUNS/Z6Yd # uses a config file ece-data-check.py # print list of available platforms in default config-run.xml ece-data-check.py -p cca-intel # use cca-intel platform info from default config-run.xml """ from xml.etree import ElementTree, ElementInclude import xml.sax from io import StringIO import hashlib import re import os, sys, glob import argparse def md5checksum(filepath): """Returns MD5 sum of one file""" with open(filepath, 'rb') as fh: m = hashlib.md5() while True: data = fh.read(8192) if not data: break m.update(data) return m.hexdigest() def checksumfile(md5file, dataloc, verbose=False): """Read MD5 sums from the md5file and check them.""" reg=re.compile(r"([^#]{32}) +(.+)") # to parse md5 files # -- read md5sums checksum={} with open(md5file, 'r') as fobj: for line in fobj: ma = reg.match(line) if ma: checksum[ma.group(2)]=ma.group(1) # -- check the sums for k in checksum: qf = os.path.join(dataloc, k) if os.path.exists(qf): current = md5checksum( qf ) if current == checksum[k]: if verbose: print k,": OK" else: print k,": DIFFER" else: print k,": MISSING" def check_ini_data_dir(verb=False, rdir=None, platform='unknown-platform', classic=None, config="config-run.xml"): """ Find INI_DATA_DIR according to RUNDIR or config-run.xml. Verify the MD5 sums read in the runtime/datacheck directory. """ # -- Dir with the sums files sumdir = os.path.dirname(os.path.realpath(__file__)) # -- Find INI DATA DIR from a link to it in the rundir if rdir: testfile = os.path.realpath(os.path.join(rdir,'cf_name_table.txt')) datadir = testfile.replace('/oasis/cf_name_table.txt','') # -- Find INI DATA DIR from classic/config-run.xml else: # default classic if not classic: classic = os.path.join(os.path.dirname(sumdir), 'classic') if not os.path.exists(classic): classic = os.path.join(os.path.dirname(sumdir), 'no-classic-runtime-here') cnfg = os.path.join(classic, config) os.chdir(os.path.dirname(cnfg)) #cnfg = os.path.join(classic, config) # parse parser = xml.sax.make_parser() try: tree = ElementTree.parse(cnfg) except IOError: print "*EE* Could not open file:",cnfg return 1 except ElementTree.ParseError, v: row, column = v.position print "*EE* error on row", row, "column", column, ":", v return 1 except: print "*EE* Could not parse XML file:",cnfg return 1 found = False platforms=[] root = tree.getroot() ElementInclude.include(root) s = unicode(ElementTree.tostring(root)) io = StringIO(s) parser.parse(io) for platf in root.findall('Platform'): platforms.append( platf.get('name')) if platf.get('name') == platform: for param in platf.findall('Parameter'): if param.get('name') == "INI_DATA_DIR": datadir=param.find('Value').text found= True break if not found: print "*EE* parameter 'INI_DATA_DIR' for platform {} not found in {}".format(platform, cnfg) print "\nAvailable platforms:\n\t", "\n\t".join(platforms) return 1 # -- work print "*II* Checking data sets in", datadir print "*II* using the checksums from", sumdir if not verb: print "*II* Only errors are reported" if not os.path.exists(datadir): print "*EE* INI DATA DIR '{}' does not exist".format(datadir) return 1 for fname in glob.glob(os.path.join(sumdir,"*.md5")): print '\nChecking',fname checksumfile(fname, datadir, verbose=verb) if __name__ == "__main__": # Use ../classic dir if exists as default (path relative to where the script is called!) cwd = os.getcwd() classic = os.path.join(os.path.dirname(cwd), 'classic') if not os.path.exists(classic): classic = None # options parser = argparse.ArgumentParser(description=doc, formatter_class=argparse.RawTextHelpFormatter) parser.add_argument("-v", "--verbose", help="print OK for each successfully verified file", action="store_true") parser.add_argument("-r", "--rundir", help="run dir (takes precedence over xml)", metavar='DIR') parser.add_argument("-p", "--platform", help="platform to use when reading the config ", metavar='PLATFORM', ) parser.add_argument("-c", "--classic", help="runtime/classic dir (default: ../classic)", metavar='CLASSIC', default=classic) parser.add_argument("-x", "--xml", help="config file used (default: config-run.xml)", default='config-run.xml') args=parser.parse_args() sys.exit( check_ini_data_dir(verb=args.verbose, rdir=args.rundir, platform=args.platform, classic=args.classic, config=args.xml) )