mkpert.R 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188
  1. rm(list = ls())
  2. graphics.off()
  3. library(ncdf)
  4. # =============================================================================
  5. # Author - François Massonnet
  6. #
  7. # Date - 02 February 2016, Candelaria!
  8. # https://es.wikipedia.org/wiki/Fiesta_de_la_Candelaria
  9. # - 22 June 2016: enhancement to make consistent perturbations across
  10. # several variables
  11. # - 1st July 2016: Simplification. It is indeed not necessary to do
  12. # a SVD here. By multiplying the available perturbations by
  13. # z / sqrt(N - 1) we create data with the same cov. matrix.
  14. # See also M. Meinvielle's thesis:
  15. # https://hal.inria.fr/file/index/docid/681484/filename/24564_MEINVIELLE_2012_archivage.pdf
  16. # Goal - Create perturbed versions of the fields of DFS5.2. As many
  17. # perturbations as the user wants can be created, but they will remain
  18. # in the subspace spanned by the available perturbations (computed in
  19. # the first script preprocess.bash)
  20. # A seed is used to ensure reproducibility of perturbations.
  21. #
  22. # Input - Files of perturbations must be available. They can be computed
  23. # using the script preprocess.bash
  24. # =============================================================================
  25. strvar <- c('t2', 'qsw', 'qlw') # Name of the variables for which perturbations
  26. # have to be created.
  27. fstrvar <- c('t2', 'radsw', 'radlw') # Matching name of the variable as they
  28. # appear in the NetCDF
  29. # Normally equal to strvar, except
  30. # qsw --> radsw and qlw --> radlw
  31. # The folder where the perturbations are (the folder was created during
  32. # the execution of preprocess.bash)
  33. pertdir <- c('/esnas/scratch/fmassonn/TMP/TMP_t2_15262',
  34. '/esnas/scratch/fmassonn/TMP/TMP_qsw_26904' ,
  35. '/esnas/scratch/fmassonn/TMP/TMP_qlw_15266')
  36. yearbp <- 1979 # The years defining the time period used
  37. # to compute perturbations. That is, the reference period from
  38. # which variability is estimated.
  39. yearep <- 2015 # That period must be included in the period for which perturbations were
  40. # created (in preprocess.bash)
  41. yearb <- 1973 # The years for which a new perturbation has to be calculated
  42. yeare <- 1973 # Can be outside the interval above
  43. npert <- 25 # Number of perturbations that have to be created each year
  44. # ---------------------------------------------
  45. # Script starts -------------------------------
  46. # ---------------------------------------------
  47. nvar <- length(strvar)
  48. nyearp <- yearep - yearbp + 1 # Number of years for the reference period
  49. nyear <- yeare - yearb + 1 # Number of years for which to create perturbations
  50. nsamp <- nyearp - 1 # Number of perturbations samples available.
  51. # Since we create
  52. # the perturbations as successive year-to-year diffs,
  53. # we have one less than the number of years
  54. for (jvar in seq(1, nvar)) {
  55. print(paste("Working on variable", strvar[jvar]))
  56. # 1/ Recording the perturbations from the available sample
  57. # --------------------------------------------------------
  58. for (year in seq(yearbp + 1, yearep)) {
  59. print(paste(" Reading data:", year, "-", year - 1))
  60. filein <- paste(pertdir[jvar], "/", "diff_", strvar[jvar], "_DFS5.2_day_", year,
  61. "-", year - 1, ".nc", sep = "")
  62. f <- open.ncdf(filein, mode = 'r')
  63. var <- get.var.ncdf(f, fstrvar[jvar])
  64. if (year == yearbp + 1) {
  65. # If we are in the first year
  66. # let's also create the matrix that will receive the data
  67. ny <- dim(var)[1]
  68. nx <- dim(var)[2]
  69. nt <- dim(var)[3]
  70. ns <- ny * nx * nt # state vector dimension
  71. lat0 <- get.var.ncdf(f, 'lat0')
  72. lon0 <- get.var.ncdf(f, 'lon0')
  73. print(" Creating X_raw, this has to be done only once")
  74. X_raw <- matrix(NA, ns, nsamp)
  75. print(" X_raw created")
  76. } # if
  77. close.ncdf(f)
  78. var[is.na(var)] <- 0
  79. # This last step is necessary, because the DFS5.2 forcing set has, sometimes,
  80. # NaNs. (See e.g., year 2008, variable t2, time step 1776, in the Southern)
  81. X_raw[, year - (yearbp + 1) + 1] <- var[]
  82. rm(list = "var")
  83. } # year
  84. # Centering X
  85. # -----------
  86. print("Centering the data")
  87. myones <- matrix(1, nrow = nsamp, ncol = 1)
  88. Xbar <- 1 / nsamp * X_raw %*% myones
  89. X <- X_raw - Xbar %*% t(myones)
  90. # Clearing X_raw and Xbar, as they are taking resources!
  91. rm(list = c("X_raw", "Xbar", "myones"))
  92. print("Data centered")
  93. # Creating perturbations
  94. # ----------------------
  95. # We take a N(0,1) random vector z of size nsamp by 1
  96. # so that the product X %*% z / sqrt(nsamp - 1)
  97. # will give us a new perturbation
  98. # with the desired properties
  99. print("Creating perturbations")
  100. for (year in seq(yearb, yeare)) {
  101. print(paste("Recording perturbations for variable", strvar[jvar], "and year", year))
  102. for (jpert in seq(1, npert)) {
  103. print(paste(" Recording perturbation for member", sprintf("%02d", jpert)))
  104. # EXTREMELY IMPORTANT - this ensures reproducibility. DON'T CHANGE.
  105. # if year 1987, and perturbation 302:
  106. # then the seed is 1000 * 1987 + 302 = 1987302.
  107. # This is then a unique identifier. It won't be unique if we go beyond 999
  108. # members, but I might no longer be on this planet when that happens.
  109. #
  110. # In addition the value of z must be the same for all variables to have coherent
  111. # perturbations
  112. set.seed(year * 1000 + jpert)
  113. z <- matrix(rnorm(nsamp))
  114. sample <- array(X %*% z / sqrt(nsamp - 1), c(ny, nx, nt))
  115. print("First and last elements of perturbation: ")
  116. print(z[1])
  117. print(z[nsamp])
  118. rm(list = "z")
  119. # Record in NetCDF
  120. # ----------------
  121. # 1/ Create dimensions
  122. # --------------------
  123. dimt <- dim.def.ncdf('time', 'time', seq(1, nt), unlim = TRUE)
  124. dimy <- dim.def.ncdf('lon0', 'lon0', lon0)
  125. dimx <- dim.def.ncdf('lat0', 'lat0', lat0)
  126. # 2/ Define variables
  127. # -------------------
  128. mylist <- list()
  129. mylist[[1]] <- var.def.ncdf(fstrvar[jvar], '-', list(dimy, dimx, dimt), -1e24)
  130. # 3/ Create NetCDF
  131. # ----------------
  132. fileout <- paste(pertdir[jvar], '/pert_', strvar[jvar], '_DFS5.2_', year, '_fc', sprintf("%02d", jpert), '_ref', yearbp, '-', yearep, '.nc', sep = "")
  133. fo <- create.ncdf(fileout, mylist)
  134. close.ncdf(fo)
  135. # 4/ Open it
  136. # ----------
  137. fo <- open.ncdf(fileout, write = TRUE)
  138. # 5/ Put variable
  139. # ---------------
  140. put.var.ncdf(fo, fstrvar[jvar], sample)
  141. # 6/ Close it
  142. # -----------
  143. close.ncdf(fo)
  144. print(paste(fileout, "created"))
  145. # 7/ Clear workspace
  146. # ------------------
  147. rm(list = "sample")
  148. } # jpert
  149. } # year
  150. } # var