#! /usr/bin/env python """ TTB compare tools. """ # *** def MyLogger( name ) : # external: import logging import sys # setup logger: logger = logging.getLogger( name ) # no handlers yet ? then print to standard output: if len(logger.handlers) == 0 : logger.addHandler(logging.StreamHandler(sys.stdout)) # no level set yet ? then set for info and higher: if logger.level == 0 : logger.setLevel(logging.INFO) # ok: return logger #enddef # *** def df_files( fname1, fname2, sample=None, verbose=3 ) : """ Compare 2 hdf/netcdf files. Print diagnostics to logger system. Verbose defines a verbosity level: 1: never, i.e. nothing is printed (i.e. error returned/raised if differences are found) 2: on error, i.e. print info about differences only 3: always, i.e. print general info even if identical files (DEFAULT) """ # external: import os import numpy # tools: import mdf # get logger: logger = MyLogger('ttb') # variables: vnames1 = mdf.get_varnames( fname1 ) vnames2 = mdf.get_varnames( fname2 ) # merge: vnames = [] for vname1 in vnames1 : if vname1 not in vnames : vnames.append(vname1) for vname2 in vnames2 : if vname2 not in vnames : vnames.append(vname2) #endfor # info ... if verbose == 3: logger.info( 'compare files: ' ) for f in [fname1,fname2] : if verbose == 3: if sample != None : logger.info( ' %s (sample %i)' % (f,sample) ) else : logger.info( ' %s' % f ) if not os.path.exists(f) : logger.error( ' file not found ...' ) raise Exception # loop over variables: ok = True for vname in vnames : ## info ... if verbose == 3:logger.info( ' check %s ...' % vname ) # check ... if vname not in vnames1 : if verbose > 1 : logger.info( ' variable "%s" not in file 1 ...' % vname ) continue if vname not in vnames2 : if verbose > 1 : logger.info( ' variable "%s" not in file 2 ...' % vname ) continue # read fields: field1 = mdf.get_var( fname1, vname, sample=sample ) field2 = mdf.get_var( fname2, vname, sample=sample ) # deal with degenerated cases (empty list or array) if (type(field1) == list): if not field1: if not field2: differ = False else: if verbose > 1 : logger.info( ' variable "%s" empty in file 1 ...' % vname ) differ = True continue if not field2: if verbose > 1 : logger.info( ' variable "%s" empty in file 2 ...' % vname ) differ = True continue else: if field1.size == 0: if field2.size == 0: differ = False else: if verbose > 1 : logger.info( ' variable "%s" empty in file 1 ...' % vname ) differ = True continue if field2.size == 0: if verbose > 1 : logger.info( ' variable "%s" empty in file 2 ...' % vname ) differ = True continue # scalar, list or array ? if numpy.isscalar(field1): if field1 != field2: differ = True if verbose > 1 : logger.info( ' difference found in %s' % vname ) logger.info( ' where field #1 = %s' % repr(field1) ) logger.info( ' where field #2 = %s' % repr(field2) ) else: differ = False elif (type(field1) == list) or (type(field1[0]) == numpy.string_) : # loop over elements: for i in range(min(len(field1),len(field2))) : differ = field1[i] != field2[i] if differ : print field1 print field2 break # different ? if differ : # info .. if verbose > 1 : logger.info( ' found differences in %s ; first diff. in element %s' % (vname,str(i+1)) ) elif numpy.any( numpy.isnan(field1) ) : # info ... if verbose > 1 : logger.info( ' found NaN in first field ...' ) # set flag: differ = True elif numpy.any( numpy.isnan(field2) ) : # info ... if verbose > 1 : logger.info( ' found NaN in second field ...' ) # set flag: differ = True else : # difference field: dd = abs(field2-field1) # test: differ = dd.max() > 0.0 # different ? if differ : # tupple with location of maximum difference: iimax0 = numpy.unravel_index( dd.argmax(), dd.shape ) # convert to Fortran default indexing (ie start at 1) iimax = [] for i in iimax0 : iimax.append(i+1) iimax.reverse() # relative difference: sfrac='' if field1[iimax0] != 0.0 : rdif = dd[iimax0]*100. / field1[iimax0] sfrac = '(rel.diff. %f %%)' % rdif # info ... if verbose > 1 : try: nbad = (numpy.nonzero( dd > 0.0 ))[0].size except MemoryError: logger.info(' Not enough memory to determine number of different datapoint') else: logger.info( ' %s (out of %s) differences found in %s' % (nbad, dd.size,vname) ) logger.info( ' max diff. in : %s %s' % (str(iimax),sfrac) ) logger.info( ' where field #1 = %s' % repr(field1[iimax0]) ) logger.info( ' where field #2 = %s' % repr(field2[iimax0]) ) # reset flag: if differ : ok = False # info ... if ok : if verbose==3: logger.info( ' ok' ) else : if __name__ == "__main__" : return 1 raise ValueError return #//////////////////////////////////////////////////////// if __name__ == "__main__": from optparse import OptionParser import sys parser = OptionParser(usage='%prog [--sample=sample] [--verbose=1|2|3] file_1 file_2') parser.add_option("-s", "--sample", type="int", dest="sample", help="define record number to use, if there is several with the same name in HDF") parser.add_option("-v", "--verbose", type="int", dest="verbose", default=3, help="define level of verbosity, 1: never (check return code \$?), 2: only if differences are found, 3: always (default)") options, args = parser.parse_args() if len(args) != 2 : print "Must pass TWO file names to compare!" sys.exit(1) sys.exit( df_files( args[0], args[1], sample=options.sample, verbose=options.verbose) )