ttb_compare.py 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. #! /usr/bin/env python
  2. """
  3. TTB compare tools.
  4. """
  5. # ***
  6. def MyLogger( name ) :
  7. # external:
  8. import logging
  9. import sys
  10. # setup logger:
  11. logger = logging.getLogger( name )
  12. # no handlers yet ? then print to standard output:
  13. if len(logger.handlers) == 0 : logger.addHandler(logging.StreamHandler(sys.stdout))
  14. # no level set yet ? then set for info and higher:
  15. if logger.level == 0 : logger.setLevel(logging.INFO)
  16. # ok:
  17. return logger
  18. #enddef
  19. # ***
  20. def df_files( fname1, fname2, sample=None, verbose=3 ) :
  21. """
  22. Compare 2 hdf/netcdf files.
  23. Print diagnostics to logger system.
  24. Verbose defines a verbosity level:
  25. 1: never, i.e. nothing is printed (i.e. error returned/raised if differences are found)
  26. 2: on error, i.e. print info about differences only
  27. 3: always, i.e. print general info even if identical files (DEFAULT)
  28. """
  29. # external:
  30. import os
  31. import numpy
  32. # tools:
  33. import mdf
  34. # get logger:
  35. logger = MyLogger('ttb')
  36. # variables:
  37. vnames1 = mdf.get_varnames( fname1 )
  38. vnames2 = mdf.get_varnames( fname2 )
  39. # merge:
  40. vnames = []
  41. for vname1 in vnames1 :
  42. if vname1 not in vnames : vnames.append(vname1)
  43. for vname2 in vnames2 :
  44. if vname2 not in vnames : vnames.append(vname2)
  45. #endfor
  46. # info ...
  47. if verbose == 3: logger.info( 'compare files: ' )
  48. for f in [fname1,fname2] :
  49. if verbose == 3:
  50. if sample != None :
  51. logger.info( ' %s (sample %i)' % (f,sample) )
  52. else :
  53. logger.info( ' %s' % f )
  54. if not os.path.exists(f) :
  55. logger.error( ' file not found ...' )
  56. raise Exception
  57. # loop over variables:
  58. ok = True
  59. for vname in vnames :
  60. ## info ...
  61. if verbose == 3:logger.info( ' check %s ...' % vname )
  62. # check ...
  63. if vname not in vnames1 :
  64. if verbose > 1 : logger.info( ' variable "%s" not in file 1 ...' % vname )
  65. continue
  66. if vname not in vnames2 :
  67. if verbose > 1 : logger.info( ' variable "%s" not in file 2 ...' % vname )
  68. continue
  69. # read fields:
  70. field1 = mdf.get_var( fname1, vname, sample=sample )
  71. field2 = mdf.get_var( fname2, vname, sample=sample )
  72. # deal with degenerated cases (empty list or array)
  73. if (type(field1) == list):
  74. if not field1:
  75. if not field2:
  76. differ = False
  77. else:
  78. if verbose > 1 : logger.info( ' variable "%s" empty in file 1 ...' % vname )
  79. differ = True
  80. continue
  81. if not field2:
  82. if verbose > 1 : logger.info( ' variable "%s" empty in file 2 ...' % vname )
  83. differ = True
  84. continue
  85. else:
  86. if field1.size == 0:
  87. if field2.size == 0:
  88. differ = False
  89. else:
  90. if verbose > 1 : logger.info( ' variable "%s" empty in file 1 ...' % vname )
  91. differ = True
  92. continue
  93. if field2.size == 0:
  94. if verbose > 1 : logger.info( ' variable "%s" empty in file 2 ...' % vname )
  95. differ = True
  96. continue
  97. # scalar, list or array ?
  98. if numpy.isscalar(field1):
  99. if field1 != field2:
  100. differ = True
  101. if verbose > 1 :
  102. logger.info( ' difference found in %s' % vname )
  103. logger.info( ' where field #1 = %s' % repr(field1) )
  104. logger.info( ' where field #2 = %s' % repr(field2) )
  105. else:
  106. differ = False
  107. elif (type(field1) == list) or (type(field1[0]) == numpy.string_) :
  108. # loop over elements:
  109. for i in range(min(len(field1),len(field2))) :
  110. differ = field1[i] != field2[i]
  111. if differ :
  112. print field1
  113. print field2
  114. break
  115. # different ?
  116. if differ :
  117. # info ..
  118. if verbose > 1 : logger.info( ' found differences in %s ; first diff. in element %s' % (vname,str(i+1)) )
  119. elif numpy.any( numpy.isnan(field1) ) :
  120. # info ...
  121. if verbose > 1 : logger.info( ' found NaN in first field ...' )
  122. # set flag:
  123. differ = True
  124. elif numpy.any( numpy.isnan(field2) ) :
  125. # info ...
  126. if verbose > 1 : logger.info( ' found NaN in second field ...' )
  127. # set flag:
  128. differ = True
  129. else :
  130. # difference field:
  131. dd = abs(field2-field1)
  132. # test:
  133. differ = dd.max() > 0.0
  134. # different ?
  135. if differ :
  136. # tupple with location of maximum difference:
  137. iimax0 = numpy.unravel_index( dd.argmax(), dd.shape )
  138. # convert to Fortran default indexing (ie start at 1)
  139. iimax = []
  140. for i in iimax0 : iimax.append(i+1)
  141. iimax.reverse()
  142. # relative difference:
  143. sfrac=''
  144. if field1[iimax0] != 0.0 :
  145. rdif = dd[iimax0]*100. / field1[iimax0]
  146. sfrac = '(rel.diff. %f %%)' % rdif
  147. # info ...
  148. if verbose > 1 :
  149. try:
  150. nbad = (numpy.nonzero( dd > 0.0 ))[0].size
  151. except MemoryError:
  152. logger.info(' Not enough memory to determine number of different datapoint')
  153. else:
  154. logger.info( ' %s (out of %s) differences found in %s' % (nbad, dd.size,vname) )
  155. logger.info( ' max diff. in : %s %s' % (str(iimax),sfrac) )
  156. logger.info( ' where field #1 = %s' % repr(field1[iimax0]) )
  157. logger.info( ' where field #2 = %s' % repr(field2[iimax0]) )
  158. # reset flag:
  159. if differ : ok = False
  160. # info ...
  161. if ok :
  162. if verbose==3: logger.info( ' ok' )
  163. else :
  164. if __name__ == "__main__" : return 1
  165. raise ValueError
  166. return
  167. #////////////////////////////////////////////////////////
  168. if __name__ == "__main__":
  169. from optparse import OptionParser
  170. import sys
  171. parser = OptionParser(usage='%prog [--sample=sample] [--verbose=1|2|3] file_1 file_2')
  172. parser.add_option("-s", "--sample", type="int", dest="sample",
  173. help="define record number to use, if there is several with the same name in HDF")
  174. parser.add_option("-v", "--verbose", type="int", dest="verbose", default=3,
  175. help="define level of verbosity, 1: never (check return code \$?), 2: only if differences are found, 3: always (default)")
  176. options, args = parser.parse_args()
  177. if len(args) != 2 :
  178. print "Must pass TWO file names to compare!"
  179. sys.exit(1)
  180. sys.exit( df_files( args[0], args[1], sample=options.sample, verbose=options.verbose) )