pbarriat
/
ecearth3


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422
							#
# quick access to hdf files
#

"""
Quick access to data in HDF file.
"""


# ***


def const_name( i ) :

    """
    Return names for pyhdf constants.
    """
    
    # external
    import pyhdf.SD
    
    # set name corresponding to number:
    if   i == pyhdf.SD.SDC.CHAR    : name = 'char'
    elif i == pyhdf.SD.SDC.INT32   : name = 'int32'
    elif i == pyhdf.SD.SDC.FLOAT32 : name = 'float32'
    else : name = str(i)
    
    # ok
    return name

#enddef


# ***


def show( fname ) :

    """
    List content of hdf file.
    """
    
    # external
    import os
    import pyhdf.SD
    
    # header:
    print '%s {' % os.path.basename(fname)

    # open:
    hdid = pyhdf.SD.SD( fname, pyhdf.SD.SDC.READ )
    
    # number of data sets and attributes:
    ndset,nattr = hdid.info()

    # display ...
    print 'variables:'

    # loop over data sets:
    for idset in range(ndset) :
        # select variable:
        varid = hdid.select(idset)
        # variable info:
        vname,irank,ishape,itype,inatt = varid.info()
        # extract dimensions:
        dimnames = ''
        dimlens = ''
        for qi in range(irank) :
            # current dimension id:
            dimid = varid.dim(qi)
            # extract:
            dimname,dimlen,dimscale,dimtype = dimid.info()
            # dimline from info might be unlimitted, 
            # try what the lenght() function gives (sometimes an error ..)
            try :
                dimlength = dimid.length() 
                dimlen = dimlength
            except :
                pass
            #endtry
            # extend line:
            if len(dimnames) > 0 : dimnames = dimnames+', '
            dimnames = dimnames+dimname
            # extend line:
            if len(dimlens) > 0 : dimlens = dimlens+','
            dimlens = dimlens+str(dimlen)
        #endfor
        # header : variablename( dimensions )
        print '  %s %s( %s ) :  // (%s)' % (const_name(itype),vname,dimnames,dimlens)
        # attributes:
        attr = varid.attributes()
        for key in attr.keys() :
            print '    ', key, ' = ', attr.get(key), ' ;'
        #endfor
    #endfor
    
    # Get global attribute dictionnary
    attr = hdid.attributes()    
    # display ...
    print ''
    print '// global attributes:'
    for key in attr.keys() :
        print '    ', key, ' = ', attr.get(key), ' ;'
    #endfor

    # close:
    hdid.end()
    
    # end:
    print '}'
    
    # ok
    return

#enddef


# ***


def get_varnames( fname ) :

    """
    Return variable names found in hdf file.
    """
    
    # external
    import pyhdf.SD
    
    # empty result:
    varnames = []

    # open:
    hdid = pyhdf.SD.SD( fname, pyhdf.SD.SDC.READ )
    # number of data sets and attributes:
    ndset,nattr = hdid.info()
    # loop over data sets:
    for idset in range(ndset) :
        # select variable:
        varid = hdid.select(idset)
        # variable info:
        vname,irank,ishape,itype,inatt = varid.info()
        # add:
        varnames.append(vname)
    #endfor
    
    # ok
    return varnames

#enddef


# ***


def get_var( fname, vname, sample=None ) :

    """
    Read data set 'vname' from the specified hdf file.
    If multiple data sets with this name exists the data is stacked.
    """
    
    # external
    import pyhdf.SD
    import numpy
    import warnings

    # trap ...
    with warnings.catch_warnings(DeprecationWarning) :
        warnings.simplefilter("ignore")

        # open:
        hdid = pyhdf.SD.SD( fname, pyhdf.SD.SDC.READ )

        # number of data sets and attributes:
        ndset,nattr = hdid.info()

        # current match:
        isample = 0
        # init result:
        data = None

        # loop over data sets:
        for idset in range(ndset) :
            # select variable:
            varid = hdid.select(idset)
            # variable info:
            dname,drank,dshape,dtype,dnatt = varid.info()
            # only target variable ...
            if dname != vname : continue
            # extract data:
            if dtype == pyhdf.SD.SDC.CHAR :
                field = get_char( varid, ishape )
            else :
                field = varid.get()
            #endif
            # increase counter:
            isample = isample + 1
            # specific sample required ?
            if sample != None :
                # first or the requested one ?
                if (isample == 1) or (isample == sample) :
                    # copy result:
                    data = field
                    # leave:
                    break
                #endif
            else :
                # copy or stack:
                if isample == 1 :
                    # copy:
                    data = field
                else :
                    # reshape target data if necessary:
                    if isample == 2 : data = data.reshape( (1,)+data.shape )
                    # reshape new field:
                    field = field.reshape( (1,)+field.shape )
                    # stack along first dimension:
                    data = numpy.vstack( (data,field) )
                #endif
            #endif
        #endfor

        # close:
        hdid.end()
        
    #endwith
    
    # check ...
    if data == None :
        print 'ERROR - variable(s) "%s" not found' % vname
        print 'ERROR - in file : ', fname
        raise Exception
    #endif
    if (sample != None) and (isample > 1) :
        if sample > isample :
            print 'ERROR - sample %i of variable "%s" not found' % (sample,vname)
            print 'ERROR - number of samples found : %i' % isample
            raise Exception
        #endif
    #endif
    
    # ok
    return data

#enddef


# ***


def get_char( varid, shp ) :

    # external:
    import numpy

    # print for single string (1D) or table (2D):
    if len(shp) == 1:
        # can only get single value without errors ...
        s = ''
        for i in range(shp[0]):
            c = varid.get( start=(i), count=(1) )
            s = s+c[0]
        #endfor
        # store:
        values = s.strip()
    elif len(shp) == 2:
        # init output:
        values = []
        # loop over strings:
        for j in range(shp[0]):
            # can only get single value without errors ...
            s = ''
            for i in range(shp[1]):
                c = varid.get( start=(j,i), count=(1,1) )
                s = s+c[0][0]
            #endfor
            # store:
            values.append(s.strip())
        #endfor
    elif len(shp) == 3:
        # init output:
        values = numpy.zeros( (shp[0:2]), 'S%i' % shp[2] )
        # loop over strings:
        for k in range(shp[0]):
            for j in range(shp[1]):
                # can only get single value without errors ...
                s = ''
                for i in range(shp[2]):
                    c = varid.get( start=(k,j,i), count=(1,1,1) )
                    s = s+c[0][0][0]
                #endfor
                # store:
                values[k,j] = s
            #endfor
        #endfor
    elif len(shp) == 4:
        # init output:
        values = numpy.zeros( (shp[0:3]), 'S%i' % shp[3] )
        # loop over strings:
        for l in range(shp[0]):
            for k in range(shp[1]):
                for j in range(shp[2]):
                    # can only get single value without errors ...
                    s = ''
                    for i in range(shp[3]):
                        c = varid.get( start=(l,k,j,i), count=(1,1,1,1) )
                        s = s+c[0][0][0][0]
                    #endfor
                    # store:
                    values[l,k,j] = s
                #endfor
            #endfor
        #endfor
    else:
        # warning ..
        print "  sorry, not implemented for shape : ", shp
    #endif
    
    # ok
    return values

#enddef


# ***


def get_attr( fname, aname ) :

    """
    Read global attribute 'aname' from the specified hdf file.
    """
    
    # external
    import pyhdf.SD

    # open:
    hdid = pyhdf.SD.SD( fname, pyhdf.SD.SDC.READ )
    
    # extract:
    attr = hdid.attributes()
    data = attr.get(aname)
    
    # close:
    hdid.end()
    
    # ok
    return data

#enddef


# ***


def get_var_attr( fname, vname, aname ) :

    """
    Read global attribute 'aname' from variable 'vname' from the specified hdf file.
    """
    
    # external
    import pyhdf.SD

    # open:
    hdid = pyhdf.SD.SD( fname, pyhdf.SD.SDC.READ )

    # number of data sets and attributes:
    ndset,nattr = hdid.info()

    # current match:
    imatch = 0

    # loop over data sets:
    for idset in range(ndset) :
        # select variable:
        varid = hdid.select(idset)
        # variable info:
        dname,drank,dshape,dtype,dnatt = varid.info()
        # only target variable ...
        if dname != vname : continue
        # atrributes:
        attr = varid.attributes()
        # extract:
        values = attr.get(aname)
        # same as previous ?
        if imatch >= 1 :
            if values == values_prev : continue
        #endif
        # store for comparision:
        values_prev = values
        # increase counter:
        imatch = imatch + 1
        # copy or add to data
        if imatch == 1 :
            # copy:
            data = values
        elif imatch == 2 :
            # combine in a list:
            data = [data,values]
        else :
            # add to list:
            data.append( values )
        #endif
    #endfor
    
    # check ...
    if imatch == 0 :
        print 'ERROR - variable not found : ', vname
        print 'ERROR - in file : ', fname
        raise IOError
    #endif
    
    # close:
    hdid.end()
    
    # ok
    return data

#enddef