ece-data-check.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168
  1. #! /usr/bin/env python
  2. doc="""
  3. Verify the checksum of the data sets used for a run. The data location is
  4. retrieved from either a run directory (-r) or a config-run.xml file, which is
  5. parsed for a specific platform (-p).
  6. If you do not specify a run directory, a config-run.xml file is
  7. used to find the data. The following are tried:
  8. ../classic/config-run.xml (relative to where the script is called)
  9. ../classic/config-run.xml (relative to where the script is located)
  10. You can specify the path and/or the filename of the config file. And if you
  11. do not specify a platform to read it, a list of available platforms is
  12. printed.
  13. Examples:
  14. # uses a rundir
  15. ece-data-check.py -r /scratch/ms/nl/nm6/ECEARTH-RUNS/Z6Yd
  16. # uses a config file
  17. ece-data-check.py # print list of available platforms in default config-run.xml
  18. ece-data-check.py -p cca-intel # use cca-intel platform info from default config-run.xml
  19. """
  20. from xml.etree import ElementTree, ElementInclude
  21. import xml.sax
  22. from io import StringIO
  23. import hashlib
  24. import re
  25. import os, sys, glob
  26. import argparse
  27. def md5checksum(filepath):
  28. """Returns MD5 sum of one file"""
  29. with open(filepath, 'rb') as fh:
  30. m = hashlib.md5()
  31. while True:
  32. data = fh.read(8192)
  33. if not data:
  34. break
  35. m.update(data)
  36. return m.hexdigest()
  37. def checksumfile(md5file, dataloc, verbose=False):
  38. """Read MD5 sums from the md5file and check them."""
  39. reg=re.compile(r"([^#]{32}) +(.+)") # to parse md5 files
  40. # -- read md5sums
  41. checksum={}
  42. with open(md5file, 'r') as fobj:
  43. for line in fobj:
  44. ma = reg.match(line)
  45. if ma: checksum[ma.group(2)]=ma.group(1)
  46. # -- check the sums
  47. for k in checksum:
  48. qf = os.path.join(dataloc, k)
  49. if os.path.exists(qf):
  50. current = md5checksum( qf )
  51. if current == checksum[k]:
  52. if verbose: print k,": OK"
  53. else:
  54. print k,": DIFFER"
  55. else:
  56. print k,": MISSING"
  57. def check_ini_data_dir(verb=False, rdir=None, platform='unknown-platform',
  58. classic=None, config="config-run.xml"):
  59. """
  60. Find INI_DATA_DIR according to RUNDIR or config-run.xml. Verify the MD5
  61. sums read in the runtime/datacheck directory.
  62. """
  63. # -- Dir with the sums files
  64. sumdir = os.path.dirname(os.path.realpath(__file__))
  65. # -- Find INI DATA DIR from a link to it in the rundir
  66. if rdir:
  67. testfile = os.path.realpath(os.path.join(rdir,'cf_name_table.txt'))
  68. datadir = testfile.replace('/oasis/cf_name_table.txt','')
  69. # -- Find INI DATA DIR from classic/config-run.xml
  70. else:
  71. # default classic
  72. if not classic:
  73. classic = os.path.join(os.path.dirname(sumdir), 'classic')
  74. if not os.path.exists(classic):
  75. classic = os.path.join(os.path.dirname(sumdir), 'no-classic-runtime-here')
  76. cnfg = os.path.join(classic, config)
  77. os.chdir(os.path.dirname(cnfg))
  78. #cnfg = os.path.join(classic, config)
  79. # parse
  80. parser = xml.sax.make_parser()
  81. try:
  82. tree = ElementTree.parse(cnfg)
  83. except IOError:
  84. print "*EE* Could not open file:",cnfg
  85. return 1
  86. except ElementTree.ParseError, v:
  87. row, column = v.position
  88. print "*EE* error on row", row, "column", column, ":", v
  89. return 1
  90. except:
  91. print "*EE* Could not parse XML file:",cnfg
  92. return 1
  93. found = False
  94. platforms=[]
  95. root = tree.getroot()
  96. ElementInclude.include(root)
  97. s = unicode(ElementTree.tostring(root))
  98. io = StringIO(s)
  99. parser.parse(io)
  100. for platf in root.findall('Platform'):
  101. platforms.append( platf.get('name'))
  102. if platf.get('name') == platform:
  103. for param in platf.findall('Parameter'):
  104. if param.get('name') == "INI_DATA_DIR":
  105. datadir=param.find('Value').text
  106. found= True
  107. break
  108. if not found:
  109. print "*EE* parameter 'INI_DATA_DIR' for platform {} not found in {}".format(platform, cnfg)
  110. print "\nAvailable platforms:\n\t", "\n\t".join(platforms)
  111. return 1
  112. # -- work
  113. print "*II* Checking data sets in", datadir
  114. print "*II* using the checksums from", sumdir
  115. if not verb: print "*II* Only errors are reported"
  116. if not os.path.exists(datadir):
  117. print "*EE* INI DATA DIR '{}' does not exist".format(datadir)
  118. return 1
  119. for fname in glob.glob(os.path.join(sumdir,"*.md5")):
  120. print '\nChecking',fname
  121. checksumfile(fname, datadir, verbose=verb)
  122. if __name__ == "__main__":
  123. # Use ../classic dir if exists as default (path relative to where the script is called!)
  124. cwd = os.getcwd()
  125. classic = os.path.join(os.path.dirname(cwd), 'classic')
  126. if not os.path.exists(classic):
  127. classic = None
  128. # options
  129. parser = argparse.ArgumentParser(description=doc, formatter_class=argparse.RawTextHelpFormatter)
  130. parser.add_argument("-v", "--verbose", help="print OK for each successfully verified file", action="store_true")
  131. parser.add_argument("-r", "--rundir", help="run dir (takes precedence over xml)", metavar='DIR')
  132. parser.add_argument("-p", "--platform", help="platform to use when reading the config ", metavar='PLATFORM', )
  133. parser.add_argument("-c", "--classic", help="runtime/classic dir (default: ../classic)", metavar='CLASSIC', default=classic)
  134. parser.add_argument("-x", "--xml", help="config file used (default: config-run.xml)", default='config-run.xml')
  135. args=parser.parse_args()
  136. sys.exit( check_ini_data_dir(verb=args.verbose, rdir=args.rundir, platform=args.platform, classic=args.classic, config=args.xml) )