|
- #! /usr/bin/env python
- """
- TTB compare tools.
- """
- # ***
- def MyLogger( name ) :
- # external:
- import logging
- import sys
- # setup logger:
- logger = logging.getLogger( name )
- # no handlers yet ? then print to standard output:
- if len(logger.handlers) == 0 : logger.addHandler(logging.StreamHandler(sys.stdout))
- # no level set yet ? then set for info and higher:
- if logger.level == 0 : logger.setLevel(logging.INFO)
- # ok:
- return logger
- #enddef
- # ***
- def df_files( fname1, fname2, sample=None, verbose=3 ) :
- """
- Compare 2 hdf/netcdf files.
- Print diagnostics to logger system.
- Verbose defines a verbosity level:
- 1: never, i.e. nothing is printed (i.e. error returned/raised if differences are found)
- 2: on error, i.e. print info about differences only
- 3: always, i.e. print general info even if identical files (DEFAULT)
- """
-
- # external:
- import os
- import numpy
- # tools:
- import mdf
- # get logger:
- logger = MyLogger('ttb')
-
- # variables:
- vnames1 = mdf.get_varnames( fname1 )
- vnames2 = mdf.get_varnames( fname2 )
- # merge:
- vnames = []
- for vname1 in vnames1 :
- if vname1 not in vnames : vnames.append(vname1)
- for vname2 in vnames2 :
- if vname2 not in vnames : vnames.append(vname2)
- #endfor
-
- # info ...
- if verbose == 3: logger.info( 'compare files: ' )
- for f in [fname1,fname2] :
- if verbose == 3:
- if sample != None :
- logger.info( ' %s (sample %i)' % (f,sample) )
- else :
- logger.info( ' %s' % f )
- if not os.path.exists(f) :
- logger.error( ' file not found ...' )
- raise Exception
- # loop over variables:
- ok = True
- for vname in vnames :
- ## info ...
- if verbose == 3:logger.info( ' check %s ...' % vname )
- # check ...
- if vname not in vnames1 :
- if verbose > 1 : logger.info( ' variable "%s" not in file 1 ...' % vname )
- continue
- if vname not in vnames2 :
- if verbose > 1 : logger.info( ' variable "%s" not in file 2 ...' % vname )
- continue
- # read fields:
- field1 = mdf.get_var( fname1, vname, sample=sample )
- field2 = mdf.get_var( fname2, vname, sample=sample )
-
- # deal with degenerated cases (empty list or array)
- if (type(field1) == list):
- if not field1:
- if not field2:
- differ = False
- else:
- if verbose > 1 : logger.info( ' variable "%s" empty in file 1 ...' % vname )
- differ = True
- continue
- if not field2:
- if verbose > 1 : logger.info( ' variable "%s" empty in file 2 ...' % vname )
- differ = True
- continue
- else:
- if field1.size == 0:
- if field2.size == 0:
- differ = False
- else:
- if verbose > 1 : logger.info( ' variable "%s" empty in file 1 ...' % vname )
- differ = True
- continue
- if field2.size == 0:
- if verbose > 1 : logger.info( ' variable "%s" empty in file 2 ...' % vname )
- differ = True
- continue
- # scalar, list or array ?
- if numpy.isscalar(field1):
- if field1 != field2:
- differ = True
- if verbose > 1 :
- logger.info( ' difference found in %s' % vname )
- logger.info( ' where field #1 = %s' % repr(field1) )
- logger.info( ' where field #2 = %s' % repr(field2) )
- else:
- differ = False
- elif (type(field1) == list) or (type(field1[0]) == numpy.string_) :
- # loop over elements:
- for i in range(min(len(field1),len(field2))) :
- differ = field1[i] != field2[i]
- if differ :
- print field1
- print field2
- break
-
- # different ?
- if differ :
- # info ..
- if verbose > 1 : logger.info( ' found differences in %s ; first diff. in element %s' % (vname,str(i+1)) )
-
- elif numpy.any( numpy.isnan(field1) ) :
- # info ...
- if verbose > 1 : logger.info( ' found NaN in first field ...' )
- # set flag:
- differ = True
- elif numpy.any( numpy.isnan(field2) ) :
- # info ...
- if verbose > 1 : logger.info( ' found NaN in second field ...' )
- # set flag:
- differ = True
- else :
- # difference field:
- dd = abs(field2-field1)
- # test:
- differ = dd.max() > 0.0
- # different ?
- if differ :
- # tupple with location of maximum difference:
- iimax0 = numpy.unravel_index( dd.argmax(), dd.shape )
- # convert to Fortran default indexing (ie start at 1)
- iimax = []
- for i in iimax0 : iimax.append(i+1)
- iimax.reverse()
-
- # relative difference:
- sfrac=''
- if field1[iimax0] != 0.0 :
- rdif = dd[iimax0]*100. / field1[iimax0]
- sfrac = '(rel.diff. %f %%)' % rdif
- # info ...
- if verbose > 1 :
- try:
- nbad = (numpy.nonzero( dd > 0.0 ))[0].size
- except MemoryError:
- logger.info(' Not enough memory to determine number of different datapoint')
- else:
- logger.info( ' %s (out of %s) differences found in %s' % (nbad, dd.size,vname) )
- logger.info( ' max diff. in : %s %s' % (str(iimax),sfrac) )
- logger.info( ' where field #1 = %s' % repr(field1[iimax0]) )
- logger.info( ' where field #2 = %s' % repr(field2[iimax0]) )
- # reset flag:
- if differ : ok = False
- # info ...
- if ok :
- if verbose==3: logger.info( ' ok' )
- else :
- if __name__ == "__main__" : return 1
- raise ValueError
-
- return
-
- #////////////////////////////////////////////////////////
- if __name__ == "__main__":
- from optparse import OptionParser
- import sys
-
- parser = OptionParser(usage='%prog [--sample=sample] [--verbose=1|2|3] file_1 file_2')
-
- parser.add_option("-s", "--sample", type="int", dest="sample",
- help="define record number to use, if there is several with the same name in HDF")
- parser.add_option("-v", "--verbose", type="int", dest="verbose", default=3,
- help="define level of verbosity, 1: never (check return code \$?), 2: only if differences are found, 3: always (default)")
- options, args = parser.parse_args()
-
- if len(args) != 2 :
- print "Must pass TWO file names to compare!"
- sys.exit(1)
-
- sys.exit( df_files( args[0], args[1], sample=options.sample, verbose=options.verbose) )
-
|