from netCDF4 import Dataset
import sys
import time
import numpy as np
from collections import OrderedDict

# Script for generating NEMO BDY data files from pre-interpolated data, and a NEMO BDY geometry file.
# Author: C. Pelletier

# Inputs:
# 1) NEMO BDY geometry description file (also needed by NEMO itself for a BDY-including run),
# can be generated from generate_new_bdy_geom.py

# 2) A set of external data that has already been pre-interpolated to the full 2D NEMO grid you want to run at.
# It's a huge overkill in terms of data (you'll need <1% of the actual full data), but it really is simpler to implement.
# It's ok to have data that is gibberish outside of the vicinity of the NEMO BDY (but do take a margin, e.g. on the 2*nbr slices near BDY, have real data), 
# but the data should be on the full NEMO domain, and should have been "drowned" (i.e., all potentially masked cells should have been filled up, e.g. with nearest-neighbors)

# Input data nomenclature
# filename: <indata_folder>/<pref_in><ftype>_<year>.nc
# WHERE:
# a) <indata_folder>: input data folder, defined below
# b) <pref_in>: input prefix, defined below
# c) <ftype>: "filetype". I typically use grid_T, grid_U and grid_V. The NEMO grid uses slightly staggered grids for thermodynamics ("grid_T"), x-direction dynamics ("grid_U"), and y-direction dynamics ("grid_V"). Your input pre-interpolated fields should have been interpolated to distinct grids, depending on their nature:
#   c.i) T-grid for temperature, salinity, sea surface height, sea-ice concentration, sea-ice thickness, snow over sea-ice;
#   c.ii) U-grid for x-direction ocean velocities (barotropic and baroclinic velocities, and potentially sea-ice U velocities)
#   c.iii) V-grid for y-direction ocean velocities (barotropic and baroclinic velocities, and potentially sea-ice V velocities)
#   The names of 'ftype' does not matter, it can be called whatever, but they should be consistent with the ftypes dictionary defined (and described) below.

# Input data netCDF content:
# DIMENSIONS: must be time_counter, depth<grid>, y, x
# where <grid> is t/u/v, depending on the gridtype of the file.
# The depth<grid> dimension is not needed for files only containing 2D fields (e.g. ssh only, sea ice only)
# VARIABLES(DIMENSIONS):
#  - time_counter(time_counter): time variable;
#  - depth<grid>(depth<grid>): depth axis
#  - <var_3d>(time_counter,depth<grid>,y,x): 3D-variable for input data fields (e.g. temp/sal/baroclinic velocities)
#  - <var_2d>(time_counter,y,x): 2D-variable for input data fields (e.g. sea ice, barotropic velocities, ssh)
# <var_3d> and <var_2d> should match the variable names given in the ftypes dictionary below.
# Input data fields should have been pre-drowned (i.e. no mask) with one exception: it's ok to have the last few vertical levels that are FULLY masked (e.g., at depths which are below you domain bathymetry). In that case, the script will just copy the deepest available level to the levels below it (and it probably won't be used in NEMO anyway but still).


indata_folder="/mfast/pelletie/oras5/preinterp"
outdata_folder="/mfast/pelletie/oras5/formatted/rim9_masked"

# Input geometry file, coming from generate_new_bdy_geom.py
geom_file = "/mfast/pelletie/oras5/grids/eORCA025-SO_BDY-COORDS_masked_rim9.nc"

pref_in = "ORAS5_preinterp_opa1_"
pref_out = "ORAS5_opa1_BDYDTA_eORCA025-SO_rim9_masked_1m_"

# Inclusive year range, i.e., data from stop_year file will also be generated.
start_year = 1979
stop_year = 1981


ftypes = OrderedDict()
# This is the dictinary that described which variables are present in each 'ftype' type of files.
# The ftype key (thing in the [] brackets after "ftypes") should be the <ftype> from the input filenames described above.
# 'grid' should be the type of gridpoints, that must be shared for all variables within one file (t, u, or v)
# In this example, I'm putting several variables within one given file, but I could have used distinct files for all variables. However, I could not have used one file for all variables, since all variables are not defined on the same t/u/v grids.
# 'variables' is another dictionary (nested within ftypes), listed the variable names contained within the input file.
# for each variable, you can specify desired minimum and maximum values for the said variable.
# Values outside of the min/max range will be brought back to min/max 

ftypes['grid_T'] = { 'grid' : 't', \
                     'variables' : {
                         'conservative_temperature' : { 'min' : -2. },
                         'absolute_salinity' : { 'min' : 25. },
                         'sossheig'          : {} \
                                   } \
                   }

ftypes['grid_U'] = { 'grid' : 'u', \
                    'variables' : {
                        'uobarotropic' : { },
                        'uobaroclinic' : { }}}

ftypes['grid_V'] = { 'grid' : 'v', \
                     'variables' : {
                     'vobarotropic' : { },
                     'vobaroclinic' : { }}}


### END OF INPUT SPECIFICATION.


grids = []

for key in ftypes.keys():
    curr_grid = ftypes[key]['grid']
    if(curr_grid not in grids):
        grids.append(curr_grid)


geom_nc = Dataset(geom_file, mode='r')

nbis = {}
nbjs = {}
nbrs = {}
lons = {}
lats = {}
nbdys = {}

for grid in grids:
    nbis[grid] = geom_nc.variables['nbi'+grid][0,:] - 1
    nbjs[grid] = geom_nc.variables['nbj'+grid][0,:] - 1
    nbrs[grid] = geom_nc.variables['nbr'+grid][0,:]
    nbdys[grid] = len(geom_nc.dimensions['xb'+grid.upper()])
    lons[grid] = geom_nc.variables['glam'+grid][0,:]
    lats[grid] = geom_nc.variables['gphi'+grid][0,:]

geom_nc.close()


for year in range(start_year,stop_year+1):
    for ftype in ftypes.keys():

        cdict = ftypes[ftype]

        input_file = indata_folder+"/"+pref_in+ftype+"_%d"%year+".nc"
        output_file = outdata_folder+"/"+pref_out+ftype+"_y%d"%year+".nc"

        print('Generating ', output_file)
        start = time.time()
        
        in_nc = Dataset(input_file, mode='r')
        out_nc = Dataset(output_file, mode='w')

        grid = cdict['grid']
        nt = len(in_nc.dimensions['time_counter'])

        ny = len(in_nc.dimensions['y'])
        nx = len(in_nc.dimensions['x'])
        
        has_3d = False
        nxb = nbdys[grid]
        
        for name, dim in in_nc.dimensions.items():
            if(name == 'depth'+grid):
                has_3d = True
                nz = len(dim)
                break

        out_nc.createDimension('yb', 1)
        out_nc.createDimension('xb'+grid.upper(), nxb)
        out_nc.createDimension('time_counter', 0)
        if(has_3d):
            out_nc.createDimension('depth'+grid, nz)

        idx_flat_2d = np.repeat( np.arange(nt) * ny*nx, nxb ) \
                      + np.tile(nbjs[grid] * nx, nt) \
                      + np.tile(nbis[grid], nt)

        time_in = in_nc.variables['time_counter']
        time_out = out_nc.createVariable(varname = 'time_counter', datatype = time_in.datatype, dimensions = time_in.dimensions)
        time_out.setncatts(time_in.__dict__)
        time_out[:] = time_in[:]

        nbi_out = out_nc.createVariable(varname='nbi'+grid, datatype='i4', dimensions=['yb', 'xb'+grid.upper()])
        nbi_out[0,:] = nbis[grid][:] + 1

        nbj_out = out_nc.createVariable(varname='nbj'+grid, datatype='i4', dimensions=['yb', 'xb'+grid.upper()])
        nbj_out[0,:] = nbjs[grid][:] + 1

        nbr_out = out_nc.createVariable(varname='nbr'+grid, datatype='i4', dimensions=['yb', 'xb'+grid.upper()])
        nbr_out[0,:] = nbrs[grid][:]
        
        if(has_3d):
            depth_in = in_nc.variables['depth'+grid]
            depth_out = out_nc.createVariable(varname = 'depth'+grid, datatype = depth_in.datatype, dimensions = depth_in.dimensions)
            depth_out.setncatts(depth_in.__dict__)
            depth_out[:] = depth_in[:]

            idx_flat_3d = np.repeat( np.arange(nt*nz) * ny*nx, nxb ) \
                          + np.tile(nbjs[grid] * nx, nt*nz) \
                          + np.tile(nbis[grid], nt*nz)


        lon_out = out_nc.createVariable(varname='nav_lon', datatype='f8', dimensions=['yb', 'xb'+grid.upper()])
        lon_out.standard_name = "longitude"
        lon_out.units = "degrees East"
        lon_out[0,:] = lons[grid][:]
            
        lat_out = out_nc.createVariable(varname='nav_lat', datatype='f8', dimensions=['yb', 'xb'+grid.upper()])
        lat_out.standard_name = "latitude"
        lat_out.units = "degrees North"
        lat_out[0,:] = lats[grid][:]
        
        for var in cdict['variables']:
            in_var = in_nc.variables[var]

            
            if( 'depth'+grid in in_var.dimensions ):

                out_var = out_nc.createVariable(var, datatype='f4', \
                                                dimensions=['time_counter', 'depth'+grid, 'yb', 'xb'+grid.upper()], fill_value = 1.e20)


                out_var[:,:,0,:] = np.reshape( (np.ndarray.flatten(in_var[:]))[idx_flat_3d], \
                                               (nt, nz, nxb) )

                if(np.ma.is_masked(out_var[:])):
                    n_msk = np.count_nonzero(out_var[:].mask)
                    all_masked_lev = np.all(out_var[:].mask, axis=(0,2,3))
                    if(np.any(all_masked_lev)):
                        idx_start_bot = np.amin((np.nonzero(all_masked_lev))[0])
                        nbot = nz - idx_start_bot
                        if( nbot * nt * nxb == n_msk):
                            out_var[:,idx_start_bot:,0,:] = (out_var[:,idx_start_bot-1,0,:])[:,None,:] * (np.ones(shape=[nbot], dtype=float))[None,:,None]
                        else:
                            print('ERROR: it does not look like you have properly predrowned ', var, 'in ', input_file)
                            sys.exit()
                    else:
                        print('ERROR: it does not look like you have properly predrowned ', var, 'in ', input_file)
                        sys.exit()
                                            
                    
            else:
                out_var = out_nc.createVariable(var, datatype='f4', \
                                                dimensions=['time_counter', 'yb', 'xb'+grid.upper()], \
                                                fill_value = 1.e20)
                out_var[:,0,:] =  np.reshape( (np.ndarray.flatten(in_var[:]))[idx_flat_2d], \
                                               (nt, nxb) )
                

            if( 'min' in cdict['variables'][var].keys() ):
                out_var[:] = np.clip(out_var[:],\
                                     a_min = cdict['variables'][var]['min'], \
                                     a_max = None)


            if( 'max' in cdict['variables'][var].keys() ):
                out_var[:] = np.clip(out_var[:],\
                                     a_max = cdict['variables'][var]['max'], \
                                     a_min = None)

        
        in_nc.close()
        out_nc.close()

        stop = time.time()

        print('Done in %.2f'%(stop-start)+'s')
        print('')