generate_new_bdy_dta.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250
  1. from netCDF4 import Dataset
  2. import sys
  3. import time
  4. import numpy as np
  5. from collections import OrderedDict
  6. # Script for generating NEMO BDY data files from pre-interpolated data, and a NEMO BDY geometry file.
  7. # Author: C. Pelletier
  8. # Inputs:
  9. # 1) NEMO BDY geometry description file (also needed by NEMO itself for a BDY-including run),
  10. # can be generated from generate_new_bdy_geom.py
  11. # 2) A set of external data that has already been pre-interpolated to the full 2D NEMO grid you want to run at.
  12. # It's a huge overkill in terms of data (you'll need <1% of the actual full data), but it really is simpler to implement.
  13. # It's ok to have data that is gibberish outside of the vicinity of the NEMO BDY (but do take a margin, e.g. on the 2*nbr slices near BDY, have real data),
  14. # but the data should be on the full NEMO domain, and should have been "drowned" (i.e., all potentially masked cells should have been filled up, e.g. with nearest-neighbors)
  15. # Input data nomenclature
  16. # filename: <indata_folder>/<pref_in><ftype>_<year>.nc
  17. # WHERE:
  18. # a) <indata_folder>: input data folder, defined below
  19. # b) <pref_in>: input prefix, defined below
  20. # c) <ftype>: "filetype". I typically use grid_T, grid_U and grid_V. The NEMO grid uses slightly staggered grids for thermodynamics ("grid_T"), x-direction dynamics ("grid_U"), and y-direction dynamics ("grid_V"). Your input pre-interpolated fields should have been interpolated to distinct grids, depending on their nature:
  21. # c.i) T-grid for temperature, salinity, sea surface height, sea-ice concentration, sea-ice thickness, snow over sea-ice;
  22. # c.ii) U-grid for x-direction ocean velocities (barotropic and baroclinic velocities, and potentially sea-ice U velocities)
  23. # c.iii) V-grid for y-direction ocean velocities (barotropic and baroclinic velocities, and potentially sea-ice V velocities)
  24. # The names of 'ftype' does not matter, it can be called whatever, but they should be consistent with the ftypes dictionary defined (and described) below.
  25. # Input data netCDF content:
  26. # DIMENSIONS: must be time_counter, depth<grid>, y, x
  27. # where <grid> is t/u/v, depending on the gridtype of the file.
  28. # The depth<grid> dimension is not needed for files only containing 2D fields (e.g. ssh only, sea ice only)
  29. # VARIABLES(DIMENSIONS):
  30. # - time_counter(time_counter): time variable;
  31. # - depth<grid>(depth<grid>): depth axis
  32. # - <var_3d>(time_counter,depth<grid>,y,x): 3D-variable for input data fields (e.g. temp/sal/baroclinic velocities)
  33. # - <var_2d>(time_counter,y,x): 2D-variable for input data fields (e.g. sea ice, barotropic velocities, ssh)
  34. # <var_3d> and <var_2d> should match the variable names given in the ftypes dictionary below.
  35. # Input data fields should have been pre-drowned (i.e. no mask) with one exception: it's ok to have the last few vertical levels that are FULLY masked (e.g., at depths which are below you domain bathymetry). In that case, the script will just copy the deepest available level to the levels below it (and it probably won't be used in NEMO anyway but still).
  36. indata_folder="/mfast/pelletie/oras5/preinterp"
  37. outdata_folder="/mfast/pelletie/oras5/formatted/rim9_masked"
  38. # Input geometry file, coming from generate_new_bdy_geom.py
  39. geom_file = "/mfast/pelletie/oras5/grids/eORCA025-SO_BDY-COORDS_masked_rim9.nc"
  40. pref_in = "ORAS5_preinterp_opa1_"
  41. pref_out = "ORAS5_opa1_BDYDTA_eORCA025-SO_rim9_masked_1m_"
  42. # Inclusive year range, i.e., data from stop_year file will also be generated.
  43. start_year = 1979
  44. stop_year = 1981
  45. ftypes = OrderedDict()
  46. # This is the dictinary that described which variables are present in each 'ftype' type of files.
  47. # The ftype key (thing in the [] brackets after "ftypes") should be the <ftype> from the input filenames described above.
  48. # 'grid' should be the type of gridpoints, that must be shared for all variables within one file (t, u, or v)
  49. # In this example, I'm putting several variables within one given file, but I could have used distinct files for all variables. However, I could not have used one file for all variables, since all variables are not defined on the same t/u/v grids.
  50. # 'variables' is another dictionary (nested within ftypes), listed the variable names contained within the input file.
  51. # for each variable, you can specify desired minimum and maximum values for the said variable.
  52. # Values outside of the min/max range will be brought back to min/max
  53. ftypes['grid_T'] = { 'grid' : 't', \
  54. 'variables' : {
  55. 'conservative_temperature' : { 'min' : -2. },
  56. 'absolute_salinity' : { 'min' : 25. },
  57. 'sossheig' : {} \
  58. } \
  59. }
  60. ftypes['grid_U'] = { 'grid' : 'u', \
  61. 'variables' : {
  62. 'uobarotropic' : { },
  63. 'uobaroclinic' : { }}}
  64. ftypes['grid_V'] = { 'grid' : 'v', \
  65. 'variables' : {
  66. 'vobarotropic' : { },
  67. 'vobaroclinic' : { }}}
  68. ### END OF INPUT SPECIFICATION.
  69. grids = []
  70. for key in ftypes.keys():
  71. curr_grid = ftypes[key]['grid']
  72. if(curr_grid not in grids):
  73. grids.append(curr_grid)
  74. geom_nc = Dataset(geom_file, mode='r')
  75. nbis = {}
  76. nbjs = {}
  77. nbrs = {}
  78. lons = {}
  79. lats = {}
  80. nbdys = {}
  81. for grid in grids:
  82. nbis[grid] = geom_nc.variables['nbi'+grid][0,:] - 1
  83. nbjs[grid] = geom_nc.variables['nbj'+grid][0,:] - 1
  84. nbrs[grid] = geom_nc.variables['nbr'+grid][0,:]
  85. nbdys[grid] = len(geom_nc.dimensions['xb'+grid.upper()])
  86. lons[grid] = geom_nc.variables['glam'+grid][0,:]
  87. lats[grid] = geom_nc.variables['gphi'+grid][0,:]
  88. geom_nc.close()
  89. for year in range(start_year,stop_year+1):
  90. for ftype in ftypes.keys():
  91. cdict = ftypes[ftype]
  92. input_file = indata_folder+"/"+pref_in+ftype+"_%d"%year+".nc"
  93. output_file = outdata_folder+"/"+pref_out+ftype+"_y%d"%year+".nc"
  94. print('Generating ', output_file)
  95. start = time.time()
  96. in_nc = Dataset(input_file, mode='r')
  97. out_nc = Dataset(output_file, mode='w')
  98. grid = cdict['grid']
  99. nt = len(in_nc.dimensions['time_counter'])
  100. ny = len(in_nc.dimensions['y'])
  101. nx = len(in_nc.dimensions['x'])
  102. has_3d = False
  103. nxb = nbdys[grid]
  104. for name, dim in in_nc.dimensions.items():
  105. if(name == 'depth'+grid):
  106. has_3d = True
  107. nz = len(dim)
  108. break
  109. out_nc.createDimension('yb', 1)
  110. out_nc.createDimension('xb'+grid.upper(), nxb)
  111. out_nc.createDimension('time_counter', 0)
  112. if(has_3d):
  113. out_nc.createDimension('depth'+grid, nz)
  114. idx_flat_2d = np.repeat( np.arange(nt) * ny*nx, nxb ) \
  115. + np.tile(nbjs[grid] * nx, nt) \
  116. + np.tile(nbis[grid], nt)
  117. time_in = in_nc.variables['time_counter']
  118. time_out = out_nc.createVariable(varname = 'time_counter', datatype = time_in.datatype, dimensions = time_in.dimensions)
  119. time_out.setncatts(time_in.__dict__)
  120. time_out[:] = time_in[:]
  121. nbi_out = out_nc.createVariable(varname='nbi'+grid, datatype='i4', dimensions=['yb', 'xb'+grid.upper()])
  122. nbi_out[0,:] = nbis[grid][:] + 1
  123. nbj_out = out_nc.createVariable(varname='nbj'+grid, datatype='i4', dimensions=['yb', 'xb'+grid.upper()])
  124. nbj_out[0,:] = nbjs[grid][:] + 1
  125. nbr_out = out_nc.createVariable(varname='nbr'+grid, datatype='i4', dimensions=['yb', 'xb'+grid.upper()])
  126. nbr_out[0,:] = nbrs[grid][:]
  127. if(has_3d):
  128. depth_in = in_nc.variables['depth'+grid]
  129. depth_out = out_nc.createVariable(varname = 'depth'+grid, datatype = depth_in.datatype, dimensions = depth_in.dimensions)
  130. depth_out.setncatts(depth_in.__dict__)
  131. depth_out[:] = depth_in[:]
  132. idx_flat_3d = np.repeat( np.arange(nt*nz) * ny*nx, nxb ) \
  133. + np.tile(nbjs[grid] * nx, nt*nz) \
  134. + np.tile(nbis[grid], nt*nz)
  135. lon_out = out_nc.createVariable(varname='nav_lon', datatype='f8', dimensions=['yb', 'xb'+grid.upper()])
  136. lon_out.standard_name = "longitude"
  137. lon_out.units = "degrees East"
  138. lon_out[0,:] = lons[grid][:]
  139. lat_out = out_nc.createVariable(varname='nav_lat', datatype='f8', dimensions=['yb', 'xb'+grid.upper()])
  140. lat_out.standard_name = "latitude"
  141. lat_out.units = "degrees North"
  142. lat_out[0,:] = lats[grid][:]
  143. for var in cdict['variables']:
  144. in_var = in_nc.variables[var]
  145. if( 'depth'+grid in in_var.dimensions ):
  146. out_var = out_nc.createVariable(var, datatype='f4', \
  147. dimensions=['time_counter', 'depth'+grid, 'yb', 'xb'+grid.upper()], fill_value = 1.e20)
  148. out_var[:,:,0,:] = np.reshape( (np.ndarray.flatten(in_var[:]))[idx_flat_3d], \
  149. (nt, nz, nxb) )
  150. if(np.ma.is_masked(out_var[:])):
  151. n_msk = np.count_nonzero(out_var[:].mask)
  152. all_masked_lev = np.all(out_var[:].mask, axis=(0,2,3))
  153. if(np.any(all_masked_lev)):
  154. idx_start_bot = np.amin((np.nonzero(all_masked_lev))[0])
  155. nbot = nz - idx_start_bot
  156. if( nbot * nt * nxb == n_msk):
  157. out_var[:,idx_start_bot:,0,:] = (out_var[:,idx_start_bot-1,0,:])[:,None,:] * (np.ones(shape=[nbot], dtype=float))[None,:,None]
  158. else:
  159. print('ERROR: it does not look like you have properly predrowned ', var, 'in ', input_file)
  160. sys.exit()
  161. else:
  162. print('ERROR: it does not look like you have properly predrowned ', var, 'in ', input_file)
  163. sys.exit()
  164. else:
  165. out_var = out_nc.createVariable(var, datatype='f4', \
  166. dimensions=['time_counter', 'yb', 'xb'+grid.upper()], \
  167. fill_value = 1.e20)
  168. out_var[:,0,:] = np.reshape( (np.ndarray.flatten(in_var[:]))[idx_flat_2d], \
  169. (nt, nxb) )
  170. if( 'min' in cdict['variables'][var].keys() ):
  171. out_var[:] = np.clip(out_var[:],\
  172. a_min = cdict['variables'][var]['min'], \
  173. a_max = None)
  174. if( 'max' in cdict['variables'][var].keys() ):
  175. out_var[:] = np.clip(out_var[:],\
  176. a_max = cdict['variables'][var]['max'], \
  177. a_min = None)
  178. in_nc.close()
  179. out_nc.close()
  180. stop = time.time()
  181. print('Done in %.2f'%(stop-start)+'s')
  182. print('')