icb_pp.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159
  1. from netCDF4 import Dataset
  2. from argparse import ArgumentParser
  3. import numpy as np
  4. import sys
  5. #
  6. # Basic iceberg trajectory post-processing python script.
  7. # This script collates iceberg trajectories from the distributed datasets written
  8. # out by each processing region and rearranges the ragged arrays into contiguous
  9. # streams for each unique iceberg. The output arrays are 2D (ntraj, ntimes) arrays.
  10. # Note that some icebergs may only exist for a subset of the possible times. In these
  11. # cases the missing instances are filled with invalid (NaN) values.
  12. #
  13. parser = ArgumentParser(description='produce collated trajectory file from distributed output\
  14. files, e.g. \n python ./icb_pp.py \
  15. -t trajectory_icebergs_004248_ -n 296 -o trajsout.nc' )
  16. parser.add_argument('-t',dest='froot',help='fileroot_of_distrbuted_data; root name of \
  17. distributed trajectory output (usually completed with XXXX.nc, where \
  18. XXXX is the 4 digit processor number)',
  19. default='trajectory_icebergs_004248_')
  20. parser.add_argument('-n',dest='fnum',help='number of distributed files to process',
  21. type=int, default=None)
  22. parser.add_argument('-o',dest='fout',help='collated_output_file; file name to receive the \
  23. collated trajectory data', default='trajsout.nc')
  24. args = parser.parse_args()
  25. default_used = 0
  26. if args.froot is None:
  27. pathstart = 'trajectory_icebergs_004248_'
  28. default_used = 1
  29. else:
  30. pathstart = args.froot
  31. if args.fnum is None:
  32. procnum = 0
  33. default_used = 1
  34. else:
  35. procnum = args.fnum
  36. if args.fout is None:
  37. pathout = 'trajsout.nc'
  38. default_used = 1
  39. else:
  40. pathout = args.fout
  41. if default_used == 1:
  42. print('At least one default value will be used; command executing is:')
  43. print('icb_pp.py -t ',pathstart,' -n ',procnum,' -o ',pathout)
  44. if procnum < 1:
  45. print('Need some files to collate! procnum = ',procnum)
  46. sys.exit(11)
  47. icu = []
  48. times = []
  49. #
  50. # Loop through all distributed datasets to obtain the complete list
  51. # of iceberg identification numbers and timesteps
  52. #
  53. for n in range(procnum):
  54. nn = '%4.4d' % n
  55. fw = Dataset(pathstart+nn+'.nc')
  56. if len(fw.dimensions['n']) > 0:
  57. print pathstart+nn+'.nc'
  58. ic = fw.variables['iceberg_number'][:,0]
  59. ts = fw.variables['timestep'][:]
  60. icv = np.unique(ic)
  61. ts = np.unique(ts)
  62. print('Min Max ts: ',ts.min(), ts.max())
  63. print('Number unique icebergs= ',icv.shape[0])
  64. icu.append(icv)
  65. times.append(ts)
  66. fw.close()
  67. #
  68. # Now flatten the lists and reduce to the unique spanning set
  69. #
  70. icu = np.concatenate(icu)
  71. icu = np.unique(icu)
  72. times = np.concatenate(times)
  73. times = np.unique(times)
  74. ntraj = icu.shape[0]
  75. print(ntraj, ' unique icebergs found across all datasets')
  76. print('Icebergs ids range from: ',icu.min(), 'to: ',icu.max())
  77. print('times range from: ',times.min(), 'to: ', times.max())
  78. #
  79. # Declare 2-D arrays to receive the data from all files
  80. #
  81. nt = times.shape[0]
  82. lons = np.zeros((ntraj, nt))
  83. lats = np.zeros((ntraj, nt))
  84. tims = np.zeros((ntraj, nt))
  85. xis = np.zeros((ntraj, nt))
  86. yjs = np.zeros((ntraj, nt))
  87. #
  88. # initially fill with invalid data
  89. #
  90. lons.fill(np.nan)
  91. lats.fill(np.nan)
  92. xis.fill(np.nan)
  93. yjs.fill(np.nan)
  94. tims.fill(np.nan)
  95. #
  96. # loop through distributed datasets again, this time
  97. # checking indices against icu and times lists and
  98. # inserting data into the correct locations in the
  99. # 2-D collated sets.
  100. #
  101. for n in range(procnum):
  102. nn = '%4.4d' % n
  103. fw = Dataset(pathstart+nn+'.nc')
  104. # Note many distributed datafiles will contain no iceberg data
  105. # so skip quickly over these
  106. m = len(fw.dimensions['n'])
  107. if m > 0:
  108. inx = np.zeros(m, dtype=int)
  109. tsx = np.zeros(m, dtype=int)
  110. print pathstart+nn+'.nc'
  111. ic = fw.variables['iceberg_number'][:,0]
  112. ts = fw.variables['timestep'][:]
  113. lns = fw.variables['lon'][:]
  114. lts = fw.variables['lat'][:]
  115. xxs = fw.variables['xi'][:]
  116. yys = fw.variables['yj'][:]
  117. for k in range(m):
  118. inxx = np.where(icu == ic[k])
  119. inx[k] = inxx[0]
  120. for k in range(m):
  121. inxx = np.where(times == ts[k])
  122. tsx[k] = inxx[0]
  123. lons[inx[:],tsx[:]] = lns[:]
  124. lats[inx[:],tsx[:]] = lts[:]
  125. tims[inx[:],tsx[:]] = ts[:]
  126. xis[inx[:],tsx[:]] = xxs[:]
  127. yjs[inx[:],tsx[:]] = yys[:]
  128. fw.close()
  129. # Finally create the output file and write out the collated sets
  130. #
  131. fo = Dataset(pathout, 'w', format='NETCDF4')
  132. ntrj = fo.createDimension('ntraj', ntraj)
  133. nti = fo.createDimension('ntime', None)
  134. olon = fo.createVariable('lon', 'f4',('ntraj','ntime'))
  135. olat = fo.createVariable('lat', 'f4',('ntraj','ntime'))
  136. otim = fo.createVariable('ttim', 'f4',('ntraj','ntime'))
  137. oxis = fo.createVariable('xis', 'f4',('ntraj','ntime'))
  138. oyjs = fo.createVariable('yjs', 'f4',('ntraj','ntime'))
  139. icbn = fo.createVariable('icbn', 'f4',('ntraj'))
  140. olon[:,:] = lons
  141. olat[:,:] = lats
  142. otim[:,:] = tims
  143. oxis[:,:] = xis
  144. oyjs[:,:] = yjs
  145. icbn[:] = icu
  146. fo.close()