123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168 |
- #! /usr/bin/env python
- doc="""
- Verify the checksum of the data sets used for a run. The data location is
- retrieved from either a run directory (-r) or a config-run.xml file, which is
- parsed for a specific platform (-p).
- If you do not specify a run directory, a config-run.xml file is
- used to find the data. The following are tried:
- ../classic/config-run.xml (relative to where the script is called)
- ../classic/config-run.xml (relative to where the script is located)
- You can specify the path and/or the filename of the config file. And if you
- do not specify a platform to read it, a list of available platforms is
- printed.
- Examples:
- # uses a rundir
- ece-data-check.py -r /scratch/ms/nl/nm6/ECEARTH-RUNS/Z6Yd
- # uses a config file
- ece-data-check.py # print list of available platforms in default config-run.xml
- ece-data-check.py -p cca-intel # use cca-intel platform info from default config-run.xml
-
- """
- from xml.etree import ElementTree, ElementInclude
- import xml.sax
- from io import StringIO
- import hashlib
- import re
- import os, sys, glob
- import argparse
- def md5checksum(filepath):
- """Returns MD5 sum of one file"""
- with open(filepath, 'rb') as fh:
- m = hashlib.md5()
- while True:
- data = fh.read(8192)
- if not data:
- break
- m.update(data)
- return m.hexdigest()
- def checksumfile(md5file, dataloc, verbose=False):
- """Read MD5 sums from the md5file and check them."""
-
- reg=re.compile(r"([^#]{32}) +(.+)") # to parse md5 files
- # -- read md5sums
- checksum={}
- with open(md5file, 'r') as fobj:
- for line in fobj:
- ma = reg.match(line)
- if ma: checksum[ma.group(2)]=ma.group(1)
- # -- check the sums
- for k in checksum:
- qf = os.path.join(dataloc, k)
- if os.path.exists(qf):
- current = md5checksum( qf )
- if current == checksum[k]:
- if verbose: print k,": OK"
- else:
- print k,": DIFFER"
- else:
- print k,": MISSING"
- def check_ini_data_dir(verb=False, rdir=None, platform='unknown-platform',
- classic=None, config="config-run.xml"):
- """
- Find INI_DATA_DIR according to RUNDIR or config-run.xml. Verify the MD5
- sums read in the runtime/datacheck directory.
- """
-
- # -- Dir with the sums files
- sumdir = os.path.dirname(os.path.realpath(__file__))
- # -- Find INI DATA DIR from a link to it in the rundir
- if rdir:
- testfile = os.path.realpath(os.path.join(rdir,'cf_name_table.txt'))
- datadir = testfile.replace('/oasis/cf_name_table.txt','')
- # -- Find INI DATA DIR from classic/config-run.xml
- else:
- # default classic
- if not classic:
- classic = os.path.join(os.path.dirname(sumdir), 'classic')
- if not os.path.exists(classic):
- classic = os.path.join(os.path.dirname(sumdir), 'no-classic-runtime-here')
- cnfg = os.path.join(classic, config)
- os.chdir(os.path.dirname(cnfg))
- #cnfg = os.path.join(classic, config)
- # parse
- parser = xml.sax.make_parser()
- try:
- tree = ElementTree.parse(cnfg)
- except IOError:
- print "*EE* Could not open file:",cnfg
- return 1
- except ElementTree.ParseError, v:
- row, column = v.position
- print "*EE* error on row", row, "column", column, ":", v
- return 1
- except:
- print "*EE* Could not parse XML file:",cnfg
- return 1
- found = False
- platforms=[]
- root = tree.getroot()
- ElementInclude.include(root)
- s = unicode(ElementTree.tostring(root))
- io = StringIO(s)
- parser.parse(io)
-
- for platf in root.findall('Platform'):
- platforms.append( platf.get('name'))
- if platf.get('name') == platform:
- for param in platf.findall('Parameter'):
- if param.get('name') == "INI_DATA_DIR":
- datadir=param.find('Value').text
- found= True
- break
- if not found:
- print "*EE* parameter 'INI_DATA_DIR' for platform {} not found in {}".format(platform, cnfg)
- print "\nAvailable platforms:\n\t", "\n\t".join(platforms)
- return 1
-
- # -- work
- print "*II* Checking data sets in", datadir
- print "*II* using the checksums from", sumdir
- if not verb: print "*II* Only errors are reported"
- if not os.path.exists(datadir):
- print "*EE* INI DATA DIR '{}' does not exist".format(datadir)
- return 1
- for fname in glob.glob(os.path.join(sumdir,"*.md5")):
- print '\nChecking',fname
- checksumfile(fname, datadir, verbose=verb)
- if __name__ == "__main__":
- # Use ../classic dir if exists as default (path relative to where the script is called!)
- cwd = os.getcwd()
- classic = os.path.join(os.path.dirname(cwd), 'classic')
- if not os.path.exists(classic):
- classic = None
-
- # options
- parser = argparse.ArgumentParser(description=doc, formatter_class=argparse.RawTextHelpFormatter)
- parser.add_argument("-v", "--verbose", help="print OK for each successfully verified file", action="store_true")
- parser.add_argument("-r", "--rundir", help="run dir (takes precedence over xml)", metavar='DIR')
- parser.add_argument("-p", "--platform", help="platform to use when reading the config ", metavar='PLATFORM', )
- parser.add_argument("-c", "--classic", help="runtime/classic dir (default: ../classic)", metavar='CLASSIC', default=classic)
- parser.add_argument("-x", "--xml", help="config file used (default: config-run.xml)", default='config-run.xml')
- args=parser.parse_args()
- sys.exit( check_ini_data_dir(verb=args.verbose, rdir=args.rundir, platform=args.platform, classic=args.classic, config=args.xml) )
|