12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982 |
- CFU_PCA<-function(data, cl = 0.98, ofiles = FALSE) {
- #
- # Computes a Principal Component Analysis (PCA/EOF) of data.
- #
- # Description:
- #
- # Returns an array of EOFs, an array of PCs, a vector of FVARs (fractions of explained variance),
- # an array of CORRs (correlation maps), and the threshold for the correlation coefficient based
- # on a two-tailed Student's t-test.
- # If applied, also returns ascii files for PCs and FVARs.
- #
- # Usage:
- #
- # CFU_PCA(data, sl = 0.98, ofiles = FALSE)
- #
- # Arguments:
- #
- # data: Array containing the anomalies field for the analysis
- #
- # cl: Value of the confidence level for the correlation map.
- # It is based on a two-tailed Student's t-test.
- # Default is cl=0.98 (98% confidence level; 2% significance level).
- #
- # ofiles: Logical.
- #
- # Output:
- #
- # $PCs: Array of principal components (nt, nm)
- # $FVARs: Vector of fractions of explained variance (nm)
- # $EOFs: Array of empirical orthogonal functions (c(dim), nm)
- # $CORRs: Array of correlation maps associated with EOFs (c(dim), nm)
- # $Rtest: Value of the statistically significant corr.coefficient at cl
- #
- # Author:
- #
- # Javier Garcia-Serrano <jgarcia@ic3.cat> 17 September 2010
- # Modified by jgarcia@ic3.cat (29 August 2011): The calculation is now done by a
- # singular value decomposition of the data matrix (prcomp), not by using 'eigen'
- # on the covariance matrix (princomp).
- #
-
- d=dim(data)
- if (length(d) == 3){
- nlon=d[1]; nlat=d[2]; ns=nlon*nlat; nt=d[3]
- } else {
- nlon=1; nlat=d[1]; ns=nlon*nlat; nt=d[2]
- }
- M=array(dim=c(ns,nt))
- for (i in 1:nlat){
- M[(nlon*(i-1)+1):(nlon*(i-1)+nlon),]=data[,i,]
- }
- not=is.na(M[,1])
- yes=array(dim=c(ns))
- counter=0
- for (i in 1:ns){
- if (not[i]=='FALSE'){counter=counter+1; yes[counter]=i}
- }
- yes=yes[1:counter]
- Myes=M[yes,] #(ns_with_data,nt)
- #covariance matrix
- #covM=(Myes%*%t(Myes))/(nt-1)
- #PCA analysis
- #PCA=princomp(covmat=covM)
- PCA=prcomp(t(Myes))
- #outputs
- nm=10 #number of modes to be retained (aprox 80% of FVAR)
- std=PCA$sdev
- lambda=std*std; fvar=lambda/sum(lambda)
- fvar=round(fvar,digits=4); fvar=fvar[1:nm]*100
- #U=PCA$loadings #(ns_with_data,ns_with_data)
- U=PCA$rotation
- Un=U[,1:nm]
- PC=t(Myes)%*%Un #(nt,nm)
- PCstd=array(dim=c(nt,nm))
- for (i in 1:nm){
- S=sd(PC[,i]); PCstd[,i]=PC[,i]/S
- }
- EOFyes=(Myes%*%PCstd)/nt #(ns_with_data,nm)
- eof=array(dim=c(ns,nm))
- eof[yes,]=EOFyes
- EOF=array(dim=c(nlon,nlat,nm))
- for (i in 1:nlat){
- EOF[,i,]=eof[(nlon*(i-1)+1):(nlon*(i-1)+nlon),]
- }
- CORRyes=array(dim=c(counter,nm))
- for (i in 1:nm){
- for (j in 1:counter){
- CORRyes[j,i]=cor(Myes[j,],PCstd[,i])
- }
- }
- corr=array(dim=c(ns,nm))
- corr[yes,]=CORRyes
- CORR=array(dim=c(nlon,nlat,nm))
- for (i in 1:nlat){
- CORR[,i,]=corr[(nlon*(i-1)+1):(nlon*(i-1)+nlon),]
- }
- #t-Test
- cl=cl; cl2=(1-cl)/2
- t=qt(cl+cl2, nt-2); R=sqrt((t*t)/((t*t)+nt-2))
- #options
- #if(ofiles) {
- # save(PCstd, ascii=TRUE, file="$plotsdir/PCs_of_data.dat")
- # save(fvar, ascii=TRUE, file="$plotsdir/FVARs_of_data.dat")
- #}
- invisible(list(PCs=PCstd,FVARs=fvar,EOFs=EOF,CORRs=CORR,Rtest=R))
- }
- CFU_MCA<-function(datax, datay, cl = 0.98, ofiles = FALSE) {
- #
- # Computes a Maximum Covariance Analysis (MCA) between datax and datay.
- #
- # Description:
- #
- # Returns a vector of squared covariance fraction (SCFs) explained by each pair of
- # covariability modes, a vector of correlation coefficient (RUVs) between expansion
- # coefficients (ECs) that measures their linear relationship, and a set of regression (MCAs)
- # and correlation (CORRs) maps associated with the covariability modes. Note that both,
- # MCAs and CORRs, are 'homogeneous' patterns obtained as regression/correlation between
- # each field (predictor, predictand) and its expansion coefficient.
- # The threshold for the correlation coefficient, based on a two-tailed Student's t-test,
- # is also produced.
- # If applied, also returns ascii files for SCFs, RUVs, and ECs.
- #
- # Usage:
- #
- # CFU_MCA(datax, datay, sl = 0.98, ofiles = FALSE)
- #
- # Arguments:
- #
- # datax: Array containing the anomalies field for the predictor
- #
- # datay: Array containing the anomalies field for the predictand
- #
- # cl: Value of the confidence level for the correlation map.
- # It is based on a two-tailed Student's t-test.
- # Default is cl=0.98 (98% confidence level; 2% significance level).
- #
- # ofiles: Logical.
- #
- # Output:
- #
- # $SCFs: Vector of squared covariance fractions (nm)
- # $RUVs: Vector of correlations between expansion coefficients (nm)
- # $ECs_U: Array of expansion coefficients of predictor field (nt, nm)
- # $MCAs_U: Array of covariability patterns of predictor field (c(dim), nm)
- # $CORRs_U:Array of corr. maps associated with predictor field (c(dim), nm)
- # $ECs_V: Array of expansion coefficients of predictand field (nt, nm)
- # $MCAs_V: Array of covariability patterns of predictand field (c(dim), nm)
- # $CORRs_V:Array of corr. maps associated with predictand field (c(dim), nm)
- # $Rtest: Value of the statitically significant corr. coefficient at cl
- #
- # Author:
- #
- # Javier Garcia-Serrano <jgarcia@ic3.cat> 24 September 2010
- #
-
- dx=dim(datax)
- if (length(dx) == 3){
- nlonx=dx[1]; nlatx=dx[2]; nsx=nlonx*nlatx; nt=dx[3]
- } else {
- nlonx=1; nlatx=dx[1]; nsx=nlonx*nlatx; nt=dx[2]
- }
- X=array(dim=c(nsx,nt))
- for (i in 1:nlatx){
- X[(nlonx*(i-1)+1):(nlonx*(i-1)+nlonx),]=datax[,i,]
- }
- not=is.na(X[,1])
- yesx=array(dim=c(nsx))
- counterx=0
- for (i in 1:nsx){
- if (not[i]=='FALSE'){counterx=counterx+1; yesx[counterx]=i}
- }
- yesx=yesx[1:counterx]
- Xyes=X[yesx,] #(nsx_with_data,nt)
- dy=dim(datay)
- if (length(dy) == 3){
- nlony=dy[1]; nlaty=dy[2]; nsy=nlony*nlaty; nt=dy[3]
- } else {
- nlony=1; nlaty=dy[1]; nsy=nlony*nlaty; nt=dy[2]
- }
- Y=array(dim=c(nsy,nt))
- for (i in 1:nlaty){
- Y[(nlony*(i-1)+1):(nlony*(i-1)+nlony),]=datay[,i,]
- }
- not=is.na(Y[,1])
- yesy=array(dim=c(nsy))
- countery=0
- for (i in 1:nsy){
- if (not[i]=='FALSE'){countery=countery+1; yesy[countery]=i}
- }
- yesy=yesy[1:countery]
- Yyes=Y[yesy,] #(nsy_with_data,nt)
- #covariance matrix
- covXY=(Xyes%*%t(Yyes))/(nt-1)
- #MCA analysis
- MCA=svd(covXY)
- #outputs
- nm=10 #number of modes to be retained
- SV=MCA$d #singular values
- SV2=SV*SV; S=sum(SV2)
- SCF=SV2/S #squared covariance fraction
- SCF=SCF[1:nm]*100
- Ux=MCA$u
- Vy=MCA$v
- Un=Ux[,1:nm]; Vn=Vy[,1:nm]
- ECu=t(Xyes)%*%Un; ECv=t(Yyes)%*%Vn #(nt,nm)
- ECu_std=array(dim=c(nt,nm))
- ECv_std=array(dim=c(nt,nm))
- for (i in 1:nm){
- Su=sd(ECu[,i]); ECu_std[,i]=ECu[,i]/Su
- Sv=sd(ECv[,i]); ECv_std[,i]=ECv[,i]/Sv
- }
- RUV=array(dim=c(1,nm))
- for (i in 1:nm){
- RUV[i]=cor(ECu_std[,i],ECv_std[,i])
- }
- Uyes=(Xyes%*%ECu_std)/nt #(ns_with_data,nm)
- Vyes=(Yyes%*%ECv_std)/nt
- mcaU=array(dim=c(nsx,nm)); mcaU[yesx,]=Uyes
- mcaV=array(dim=c(nsy,nm)); mcaV[yesy,]=Vyes
- MCAU=array(dim=c(nlonx,nlatx,nm))
- MCAV=array(dim=c(nlony,nlaty,nm))
- for (i in 1:nlatx){
- MCAU[,i,]=mcaU[(nlonx*(i-1)+1):(nlonx*(i-1)+nlonx),]
- }
- for (i in 1:nlaty){
- MCAV[,i,]=mcaV[(nlony*(i-1)+1):(nlony*(i-1)+nlony),]
- }
- CORRUyes=array(dim=c(counterx,nm))
- CORRVyes=array(dim=c(countery,nm))
- for (i in 1:nm){
- for (j in 1:counterx){
- CORRUyes[j,i]=cor(Xyes[j,],ECu_std[,i])
- }
- for (j in 1:countery){
- CORRVyes[j,i]=cor(Yyes[j,],ECv_std[,i])
- }
- }
- corrU=array(dim=c(nsx,nm)); corrU[yesx,]=CORRUyes
- corrV=array(dim=c(nsy,nm)); corrV[yesy,]=CORRVyes
- CORRU=array(dim=c(nlonx,nlatx,nm))
- CORRV=array(dim=c(nlony,nlaty,nm))
- for (i in 1:nlatx){
- CORRU[,i,]=corrU[(nlonx*(i-1)+1):(nlonx*(i-1)+nlonx),]
- }
- for (i in 1:nlaty){
- CORRV[,i,]=corrV[(nlony*(i-1)+1):(nlony*(i-1)+nlony),]
- }
- #t-Test
- cl=cl; cl2=(1-cl)/2
- t=qt(cl+cl2, nt-2); R=sqrt((t*t)/((t*t)+nt-2))
- #options
- #if(ofiles) {
- # save(SCF, ascii=TRUE, file="$plotsdir/SCFs_of_data.dat")
- # save(RUV, ascii=TRUE, file="$plotsdir/RUVs_of_data.dat")
- # save(ECu_std, ascii=TRUE, file="$plotsdir/ECsU_of_data.dat")
- # save(ECv_std, ascii=TRUE, file="$plotsdir/ECsV_of_data.dat")
- #}
- invisible(list(SCFs=SCF,RUVs=RUV,ECs_U=ECu_std,MCAs_U=MCAU,CORRs_U=CORRU,ECs_V=ECv_std,MCAs_V=MCAV,CORRs_V=CORRV,Rtest=R))
- }
- CFU_lonlat2ns<-function(data, miss.val = FALSE){
- #
- # Reshape a 3D array into a 2D array handling only spatial dimensions
- #
- # Description:
- #
- # Returns a 2D array of 3D data placing the spatial dimensions into a vector according
- # to GrADS protocol, and keeps the temporal dimension with no change.
- # It localizes missing values and returns an (spatial) index with no-missing values positions.
- #
- # Usage: CFU_latlon2ns(data, miss.val = FALSE)
- #
- # Input:
- #
- # data: 3-dimensional array, e.g. (longitude, latitude, time)
- #
- # Output:
- #
- # matrix: 2-dimensional array with 'ns' spatial dimensions, e.g. (ns, time)
- #
- # Authors:
- #
- # Javier Garcia-Serrano <jgarcia@ic3.cat> 30 September 2010
- d=dim(data)
- nlon=d[1]; nlat=d[2]; ns=nlon*nlat; nt=d[3]
- M=array(dim=c(ns,nt))
- for (i in 1:nlat){
- M[(nlon*(i-1)+1):(nlon*(i-1)+nlon),]=data[,i,]
- }
- not=is.na(M[,1])
- yes=array(dim=c(ns))
- counter=0
- for (i in 1:ns){
- if (not[i]=='FALSE'){counter=counter+1; yes[counter]=i}
- }
- yes=yes[1:counter]
- Myes=M[yes,] #(ns_with_data,nt)
- #printed in screen
- print(ns)
- print(counter)
- #outputs
- if(miss.val) { Myes <- M }
- invisible(list(matrix=Myes,nt=nt,ns_original=ns,ns_noNaN=counter,index_noNaN=yes))
- }
- CFU_ns2lonlat<-function(data, nlon=1, nlat=1, yes=NULL){
- #
- # Reshape a 2D array into a 3D array handling only spatial dimensions
- #
- # Description:
- #
- # Returns a 2D array of 3D data placing the spatial dimensions into a vector according
- # to GrADS protocol, and keeps the temporal dimension with no change.
- # It localizes missing values and returns an (spatial) index with no-missing values positions.
- #
- # Usage: CFU_latlon2ns(data, miss.val = FALSE)
- #
- # Input:
- #
- # data: 3-dimensional array, e.g. (longitude, latitude, time)
- #
- # Output:
- #
- # matrix: 2-dimensional array with 'ns' spatial dimensions, e.g. (ns, time)
- #
- # Authors:
- #
- # Javier Garcia-Serrano <jgarcia@ic3.cat> 06 October 2010
- nlon=max(abs(nlon), 1); nlat=max(abs(nlat), 1); ns=nlon*nlat
- d=dim(data)
- nt=d[2]
- miss.val=is.null(yes)
- if(miss.val=='TRUE'){
- M=array(dim=c(nlon,nlat,nt))
- for (i in 1:nlat){
- M[,i,]=data[(nlon*(i-1)+1):(nlon*(i-1)+nlon),]
- }
- } else {
- Myes=array(dim=c(ns,nt)); Myes[yes,]=data
- M=array(dim=c(nlon,nlat,nt))
- for (i in 1:nlat){
- M[,i,]=Myes[(nlon*(i-1)+1):(nlon*(i-1)+nlon),]
- }
- }
- matrix=M
- matrix
- }
- CFU_detrend<-function(vector){
- #
- # Remove the linear trend in a time-series
- #
- # Description:
- #
- # Returns a vector with the same length as the original (nt) but in which
- # the linear trend has been removed.
- # No change is produced if vector is a vector of missing values.
- #
- # Usage: CFU_detrend(vector)
- #
- # Input:
- #
- # vector: 1-dimensional array, time (nt dimension)
- #
- # Output:
- #
- # 1-dimensional array without the linear trend (nt)
- #
- # Authors:
- #
- # Javier Garcia-Serrano <jgarcia@ic3.cat> 05 October 2010
- # Modified by jgarcia & vguemas@ic3.cat (27 April 2011) : to convert output into a vector (before, a list)
- nt=length(vector)
- if (is.na(vector[1])=='TRUE'){
- print("vector contains missing values")
- detrend=array(dim=nt)
- } else {
- detrend=resid(lm(vector ~ seq(vector)))
- }
- detrend=as.vector(detrend)
- #outputs
- detrend
- }
- CFU_testRR<-function(R1, N1, R2, N2, cl = 0.95){
- #
- # Testing the equality of correlation coefficients from independent samples
- #
- # Description:
- #
- # Statistical test for the equality of correlation coefficients (R1, R2)
- # from two independent samples. These samples should belong to two populations,
- # e.g. dynamical prediction (GCM) and statistical prediction; and the hypothesis
- # will test the equality of the population correlation (bilateral test).
- #
- # H0: R1==R2 vs. H1: R1=/=R2
- #
- # The statistic Z follows a N(0,1) distribution; if |Z| > Z_alfa/2, H0 is rejected.
- #
- # Usage: CFU_testRR(R1, N1, R2, N2, cl)
- #
- # Input:
- #
- # R1: number, correlation of samples #1
- #
- # N1: number, nº of elements in the sample (length #1)
- #
- # R2: number, correlation of samples #2
- #
- # N2: number, nº of elements in the sample (length #2)
- #
- # cl: Value of the confidence level.
- # Default is cl=0.95 (95% confidence level; 5% significance level).
- #
- # Output:
- #
- # value: binary output (0,1) / 0 = H0 accepted, 1 = H0 rejected
- #
- # p_value: area under the N(0,1) density curve to the right of endpoint |Z|
- #
- # Authors:
- #
- # Javier Garcia-Serrano <jgarcia@ic3.cat> 02 December 2010
- # Modified by jgarcia & lrodrigues@ic3.cat (13 December 2011) : Correcting p-values
- # in two-tailed test (alfa is now multiplied by 2).
- Y1=0.5*log((1+R1)/(1-R1)); Y2=0.5*log((1+R2)/(1-R2))
- B=(1/(N1-3)) + (1/(N2-3)); Z=abs(Y1-Y2)/sqrt(B) #abs() -> two-tailed test
- cl=cl; cl2=(1-cl)/2; Z_cl=qnorm(cl+cl2)
- if (is.na(Z)==FALSE){
- if (Z > Z_cl){ value=1 } else { value=0 }
- } else {
- value=NA
- }
- #
- #p=pnorm(Z)-pnorm(-Z) #two-tailed / bilateral --> confidence level
- #p=1-p # --> significance level
- #p=p/2 # alfa/2 --> p_value
- #
- if (is.na(Z)==FALSE){
- p=2*pnorm(Z,lower.tail=FALSE)
- } else {
- p=NA
- }
- #outputs
- invisible(list(value=value, p_value=p))
- }
- CFU_testR<-function(R, N, cl = 0.95, test="two"){
- #
- # Testing the Null-Hypothesis of population correlation equal to zero
- #
- # Description:
- #
- # Statistical test for sample correlation coefficient (R) according to
- # the Null Hypothesis of population correlation coefficient (RO) equal to zero.
- #
- # H0: RO==0 vs. H1: RO=/=0
- #
- # The statistic t follows a Student's (T-test) distribution:
- #
- # if t > t_alfa,N-2 (one-tailed) H0 is rejected
- #
- # if |t| > t_alfa/2,N-2 (two-tailed) H0 is rejected
- #
- # Usage: CFU_testR(R, N, cl, test=c("one","two"))
- #
- # Input:
- #
- # R: number, sample correlation
- #
- # N: number, nº of elements in the sample
- #
- # cl: Value of the confidence level.
- # Default is cl=0.95 (95% confidence level; 5% significance level).
- #
- # test: string of characters that can be "one" or "two" (by default) according
- # to required test; one-tailed, unilateral (the former) o two-tailed,
- # bilateral (the latter).
- #
- # Output:
- #
- # value: binary output (0,1) / 0 = H0 accepted, 1 = H0 rejected
- #
- # p_value: area under the Student's T density curve to the right of endpoint |t|
- #
- # Authors:
- #
- # Javier Garcia-Serrano <jgarcia@ic3.cat> 08 December 2010
- #
- # Modified by jgarcia & lrodrigues@ic3.cat (13 July 2011) : Correcting p-values
- # in two-tailed test (alfa is now multiplied by 2).
- t=R*sqrt((N-2)/(1-R**2))
- cl=cl
- if (test=="one"){
- t_cl=qt(cl, N-2)
- if (abs(t) > t_cl){ value=1 } else { value=0 }
- #
- if (t > 0){
- p=pt(t, N-2, lower.tail=FALSE)
- } else { p=pt(t, N-2, lower.tail=TRUE) }
- }
- if (test=="two"){
- cl2=(1-cl)/2; t_cl=qt(cl+cl2, N-2)
- if (abs(t) > t_cl){ value=1 } else { value=0 }
- #
- if (t > 0){
- p=pt(t, N-2, lower.tail=FALSE)*2
- } else { p=pt(t, N-2, lower.tail=TRUE)*2 }
- }
- #ouputs
- invisible(list(value=value, p_value=p))
- }
- CFU_AA<-function(data, lat, yes=NULL){
- #
- # Compute the area-average of a 2D (longitude x latitude) array
- #
- # Description:
- #
- # Area-average (AA) of "data" over the spatial domain described by 'longitude' and
- # 'latitude'.
- # The weights for the field-mean (AA) are based on the cosine of the latitude at each
- # gridpoint, i.e. all data in a latitudinal circle (all longitudes) have the same
- # weight, cosine("lat").
- #
- # Usage: CFU_AA(data, lat, yes)
- #
- # Input:
- #
- # data: 2-dimensional array (longitude, latitude; in this order)
- #
- # lat: vector containing the latitude coordinates
- #
- # yes: vector containing the position of non-NaN values, so 'good' values
- # (in a ns=nlon*nlat, GrADS format)
- #
- # Output:
- #
- # data_ave; value of the area-average
- #
- # Authors:
- #
- # Javier Garcia-Serrano <jgarcia@ic3.cat> 03 December 2010
- # Modified by Virginie Guemas <vguemas@ic3.cat> (March 2011) : Weight = 0 for NA grid point
- ycos=cos((lat*pi)/180); nlat=dim(data)[2]; nlon=dim(data)[1]; ns=nlon*nlat
- weight=array(dim=c(nlon,nlat))
- for (i in 1:nlon){
- weight[i,]=ycos
- }
- #weight=weight/sum(weight) #normalization
- data=data*weight #weightening
- miss.val=is.null(yes)
- if(miss.val=='TRUE'){
- M=array(dim=c(ns))
- weight2=array(dim=c(ns))
- for (i in 1:nlat){
- M[(nlon*(i-1)+1):(nlon*(i-1)+nlon)]=data[,i]
- weight2[(nlon*(i-1)+1):(nlon*(i-1)+nlon)]=weight[,i]
- }
- not=is.na(M)
- yes=array(dim=c(ns))
- counter=0
- for (i in 1:ns){
- if (not[i]=='FALSE'){counter=counter+1; yes[counter]=i}
- }
- yes=yes[1:counter]
- tmp=sum(M[yes])
- sweight=sum(weight2[yes])
- data_ave=tmp/sweight
- #data_ave=sum(M[yes])
- } else {
- yes=yes
- M=array(dim=c(ns))
- weight2=array(dim=c(ns))
- for (i in 1:nlat){
- M[(nlon*(i-1)+1):(nlon*(i-1)+nlon)]=data[,i]
- weight2[(nlon*(i-1)+1):(nlon*(i-1)+nlon)]=weight[,i]
- }
- tmp=sum(M[yes])
- sweight=sum(weight2[yes])
- data_ave=tmp/sweight
- #data_ave=sum(M[yes])
- }
- #outputs
- data_ave
- }
- CFU_neff<-function(vector, fig=FALSE){
- #
- # Compute the effective degrees of freedom of a time series
- #
- # Description:
- #
- # Number of effective degrees of freedom of "vector".
- # Based on: Zieba, A. (2010): Effective number of observations and unbiased estimators
- # of variance for autocorrelated data - an overview. Metrol. Meas. Syst. Vol XVII, No. 1,
- # pp. 3-16; index 330930, ISSN 0860-8229. www.metrology.pg.gda.pl
- #
- # Usage: CFU_neff(vector)
- #
- # Input:
- #
- # vector: 1-dimensional array, time (nt dimension)
- #
- # fig: logical, if plot in the autocorrelation of "vector" is desired
- #
- # Output:
- #
- # neff; number of effective degrees of freedom
- #
- # Authors:
- #
- # Created by Caio A. S. Coelho <caio.coelho@cptec.inpe.br>
- # Implemented in CFU_Rfunc by Javier García-Serrano <jgarcia@ic3.cat> (August 2011)
- dof=length(vector)
- a=acf(vector,lag.max=dof-1,plot=fig)$acf[2:dof,1,1]
- s=0
- for (k in 1:(dof-1)){
- s=s+(((dof-k)/dof)*(a[k]**2))
- }
- neff=(dof/(1+(2*s)))-1
- #if (neff>dof){neff=dof}
- #outputs
- #list(neff=neff)
- neff
- }
- CFU_BS <- function(obs,pred,thresholds = seq(0,1,0.1)) {
- #
- # Compute the BS and its decomposition given a set of probability predictions of binary event
- #
- # Description:
- #
- # Returns the values of the BS and its standard decomposition as well as the addition of the
- # two winthin-bin extra components (Stephenson et al., 2008). It also solves the bias-corrected
- # decomposition of the BS (Ferro and Fricker, 2012). BSS having the climatology as the reference
- # forecast.
- #
- # Usage:
- #
- # CFU_BS(obs,pred,thresholds = seq(0,1,0.1))
- #
- # Input:
- #
- # obs: Vector of binary observations (1 or 0)
- #
- # pred: Vector of probablistic predictions [0,1]
- #
- # thresholds: Values used to bin the forecasts. By default the bins are
- # {[0,0.1), [0.1, 0.2), ... [0.9, 1]}.
- #
- # Output:
- #
- # $rel: standard reliability
- # $res: standard resolution
- # $unc: standard uncertainty
- # $bs: Brier score
- # $bs_check_res: rel-res+unc
- # $bss_res: res-rel/unc
- # $gres: generalized resolution
- # $bs_check_gres: rel-gres+unc
- # $bss_gres: gres-rel/unc
- # $rel_bias_corrected: bias-corrected rel
- # $gres_bias_corrected: bias-corrected gres
- # $unc_bias_corrected: bias-corrected unc
- # $bss_bias_corrected: gres_bias_corrected-rel_bias_corrected/unc_bias_corrected
- # $nk: number of forecast in each bin
- # $fkbar: average probability of each bin
- # $okbar: relative frequency that the observed event occurred
- # $bins: bins used
- # $pred: values with which the forecasts are verified
- # $obs: probability forecasts of the event
- #
- # References:
- #
- # Wilks (2006) Statistical Methods in the Atmospheric Sciences.
- # Stephenson et al. (2008). Two extra components in the Brier score decomposition. Weather
- # and Forecasting, 23: 752-757.
- # Ferro and Fricker (2012). A bias-corrected decomposition of the BS. Quarterly Journal of
- # the Royal Meteorological Society, DOI: 10.1002/qj.1924.
- #
- # Example:
- #
- # a=runif(10)
- # b=round(a)
- # x=CFU_BS(b,a)
- # x$bs-x$bs_check_res
- # x$bs-x$bs_check_gres
- # x$rel_bias_corrected-x$gres_bias_corrected+x$unc_bias_corrected
- #
- # Author:
- #
- # Luis Rodrigues <lrodrigues@ic3.cat> 16 April 2012
- #
- if (max(pred) > 1 | min(pred) < 0) {
- cat("Predictions outside [0,1] range. \n Are you certain this is a probability forecast? \n")
- } else if (max(obs) != 1 & min(obs) != 0) {
- cat("Binary events must be either 0 or 1. \n Are you certain this is a binary event? \n")
- } else {
- nbins=length(thresholds)-1 # Number of bins
- n=length(pred)
- bins=as.list(paste("bin",1:nbins,sep=""))
- for (i in 1:nbins) {
- if (i == nbins) {
- bins[[i]]=list(which(pred>=thresholds[i] & pred<=thresholds[i+1]))
- } else {
- bins[[i]]=list(which(pred>=thresholds[i] & pred<thresholds[i+1]))
- }
- }
- #
- fkbar=okbar=nk=array(0,dim=nbins)
- for (i in 1:nbins) {
- nk[i]=length(bins[[i]][[1]])
- fkbar[i]=sum(pred[bins[[i]][[1]]])/nk[i]
- okbar[i]=sum(obs[bins[[i]][[1]]])/nk[i]
- }
- #
- obar=sum(obs)/length(obs)
- relsum=ressum=term1=term2=0
- for (i in 1:nbins) {
- if (nk[i] > 0) {
- relsum=relsum+nk[i]*(fkbar[i]-okbar[i])^2
- ressum=ressum+nk[i]*(okbar[i]-obar)^2
- for (j in 1:nk[i]) {
- term1=term1+(pred[bins[[i]][[1]][j]]-fkbar[i])^2
- term2=term2+(pred[bins[[i]][[1]][j]]-fkbar[i])*(obs[bins[[i]][[1]][j]]-okbar[i])
- }
- }
- }
- rel=relsum/n
- res=ressum/n
- unc=obar*(1-obar)
- bs=sum((pred-obs)^2)/n
- bs_check_res=rel-res+unc
- bss_res=(res-rel)/unc
- gres=res-term1*(1/n)+term2*(2/n) # Generalized resolution
- bs_check_gres=rel-gres+unc # BS using GRES
- bss_gres=(gres-rel)/unc # BSS using GRES
- #
- # Estimating the bias-corrected components of the BS
- #
- term3=array(0,nbins)
- for (i in 1:nbins) {
- term3[i]=(nk[i]/(nk[i]-1))*okbar[i]*(1-okbar[i])
- }
- term_a=sum(term3,na.rm=T)/n
- term_b=(obar*(1-obar))/(n-1)
- rel_bias_corrected=rel-term_a
- gres_bias_corrected=gres-term_a+term_b
- if (rel_bias_corrected < 0 || gres_bias_corrected < 0 ) {
- rel_bias_corrected2=max(rel_bias_corrected,rel_bias_corrected-gres_bias_corrected,0)
- gres_bias_corrected2=max(gres_bias_corrected,gres_bias_corrected-rel_bias_corrected,0)
- rel_bias_corrected=rel_bias_corrected2
- gres_bias_corrected=gres_bias_corrected2
- }
- unc_bias_corrected=unc+term_b
- bss_bias_corrected=(gres_bias_corrected-rel_bias_corrected)/unc_bias_corrected
- #
- if (round(bs,8)==round(bs_check_gres,8) & round(bs_check_gres,8)==round((rel_bias_corrected-gres_bias_corrected+unc_bias_corrected),8)) {
- # cat("No error found \n")
- # cat("BS = REL-GRES+UNC = REL_lessbias-GRES_lessbias+UNC_lessbias \n")
- }
- #
- invisible(list(rel=rel,res=res,unc=unc,bs=bs,bs_check_res=bs_check_res,bss_res=bss_res,gres=gres,bs_check_gres=bs_check_gres,bss_gres=bss_gres,rel_bias_corrected=rel_bias_corrected,gres_bias_corrected=gres_bias_corrected,unc_bias_corrected=unc_bias_corrected,bss_bias_corrected=bss_bias_corrected,nk=nk,fkbar=fkbar,okbar=okbar,bins=bins,pred=pred,obs=obs))
- #
- } # end of if
- #
- }
- CFU_plotmapmostliktercile <- function(lon,lat,ptc,
- tit='Probability forecast of most likely tercile (%)',file='G_PFC_map.ps') {
- #
- # Produce probabilistic forecast map of most likely tercile of a forecast.
- #
- # Description:
- #
- # Generates vector graphics file of a probabilistic forecast map showing the most likely tercile.
- # Based on an adapted version of Caio Coelho's EURO-BRISA function.
- #
- # Usage:
- #
- # CFU_plotmapmostliktercile(lon,lat,ptc,tit='Probability forecast of most likely tercile (%)',file='G_PFC_map.ps')
- #
- # Arguments:
- #
- # lon: A vector of longitudes.
- #
- # lat: A vector of latitudes.
- #
- # ptc: Array of forecast terciles information for each ensemble member, output of CFU_PB().
- #
- # tit: Title of the Figure.
- #
- # file: Name of the postscript output file.
- #
- # Output:
- #
- # Vector graphics file of a probabilistic forecast map showing the most likely tercile.
- #
- # Author:
- #
- # Fabian Lienert <flienert@ic3.cat> 29 Oct 2012.
- #
- source('/cfu/pub/scripts/R/rclim.txt')
- nlon <- length(lon); nlat <- length(lat)
- nfc <- dim(ptc)[1]
- nlonr <- nlon-ceiling(length(lon[lon<180 & lon > 0]))
- # Probability of most likely tercile.
- PMLT <- (ptc[1,1,,]*NA)
- for (i in 1:nlon) {
- for (j in 1:nlat) {
- lo <- sum(ptc[,1,j,i],na.rm=T)/nfc
- mid <- sum(ptc[,2,j,i],na.rm=T)/nfc
- up <- sum(ptc[,3,j,i],na.rm=T)/nfc
- if (all(up > c(mid,lo))) {
- PMLT[j,i] <- up
- } else if (all(mid > c(up,lo))) {
- PMLT[j,i] <- 0
- } else if (all(lo > c(mid,up))) {
- PMLT[j,i] <- -lo
- }
- }
- }
- # EURO-BRISA figure. Modified Caio Coelho's function.
- plotmapmostliktercile <- function(lon,lat,data,maintit='',legtit='',equi=TRUE,bw=FALSE,
- cont=FALSE,reg=FALSE,...,lonlim=c(min(floor(lon)),max(ceiling(lon))),latlim=c(min(floor(lat)),max(ceiling(lat))),orientation=NULL,
- mapdat="world",xmaplim=c(min(floor(lon)),max(ceiling(lon))),ymaplim=c(min(floor(lat)),max(ceiling(lat))),
- longrds=NULL,latgrds=NULL,
- breaks=NULL,n=11,colours=NULL,
- roundleg=1,invert=FALSE) {
- rg<-range(data, na.rm=T)
- lowerlim <- rg[1]; upperlim <- rg[2]
- maximum <- max(abs(c(lowerlim,upperlim)))
- if(is.null(breaks)){
- if(lowerlim<0) breaks <- seq(-maximum,maximum,(maximum-(-maximum))/n)
- else breaks <- seq(lowerlim,upperlim,(upperlim-(lowerlim))/n)
- }
- if(is.null(colours)){
- if(!bw){
- # Check if range includes negative values to use appropriate colour scale.
- if (rg[1] <0) {
- if (!invert) {
- colours <- bluered(seq(0,n-1,by=1), "linear", yellow =TRUE)
- } else {
- colours <- rev(bluered(seq(0,n-1,by=1), "linear", yellow =TRUE))
- }
- } else {
- colours <- bluered(seq(0,n-1,by=1), "linear", white=0, invert=T)
- }
- } else {
- if (rg[1] <0) {
- colours <- grey(seq(0, 1, length = length(breaks)-1))
- colours <- c(colours[1:((n-1)/2)],rev(colours[(((n-1)/2)+1):n]))
- } else {
- colours <- grey(seq(0, 1, length = length(breaks)-1))
- }
- }
- }
- if (equi) {
- layout(matrix(1:2,ncol=1,nrow=2),heights=c(5,1))
- par(mar=c(6,2,6.5,1),cex.axis=1.0)
- if (reg) {
- layout(matrix(1:2,ncol=1,nrow=2),heights=c(5,1))
- par(mar=c(2,3,4,1),las=1,cex=1.4)
- }
- # Create probabilistic map.
- image(lon,lat,data,axes=F,col=colours,xlab='',ylab='',breaks=breaks)
- par(cex=2.1)
- if (cont) {contour(lon,lat,data,levels=round(breaks,1),labels=round(breaks,1),add=T,...)}
- # Check if lon is from 0 to 360 or -180 to 180 to use appropriate world map.
- if (min(lon)<0) {
- map('world',interior = F,add = T, lwd=2) # Low resolution world map (lon -180 to 180).
- } else {
- map('world2',interior = F,add = T, lwd=2) # Low resolution world map (lon 0 to 360).
- }
- box()
- map.axes()
- par(font.main=1,cex=1.4)
- title(maintit)
- # Adding colorbar.
- par(mar=c(4.5,2,0,1),mgp=c(1.5,0.3,0),las=1)
- if (reg) {
- par(mar=c(3,0.3,1.5,0.3),mgp=c(1.5,0.3,0),las=1)
- }
- image(c(1:n),1,t(t(c(1:n))),axes=F,col=colours,xlab='',ylab='')
- box()
- par(font.main=1,cex=1.3)
- if (maximum > 1) {
- axis(1,at=seq(1,9),tick=F,pos=0.8,cex.axis=1.5,
- labels = c("85-100","70-85","55-70","40-55"," ","40-55","55-70","70-85","85-100"))
- axis(3,at=5,tick=F,cex.axis=1.5,pos=-1.3,labels=c("White=central tercile most likely"))
- axis(3,at=2.5,tick=F,cex.axis=1.5,pos=1.3,labels=c("Below normal"))
- axis(3,at=7.5,tick=F,cex.axis=1.5,pos=1.3,labels=c("Above normal"))
- } else {
- axis(1,at=seq(0.5, length(breaks)-0.5),tick=F,
- labels=round(breaks[1:(length(breaks))],roundleg))
- }
- # Redefine font size.
- par(cex=1.4)
- # Add the title.
- title(xlab = legtit,cex.lab=1.4)
- }
- }
- postscript(paste(file,sep=''),paper="special",width=12,height=9.25,horizontal=F)
- plotmapmostliktercile(c(lon[c((nlonr+1):nlon)]-360,lon[1:nlonr]),rev(lat),t(PMLT[nlat:1,c((nlonr+1):nlon,1:nlonr)]*100),
- reg=T,maintit=tit,invert=F,breaks=c(-100,-85,-70,-55,-40,40,55,70,85,100),n=9)
- dev.off()
- }
- CFU_PB <- function(lon,lat,ano,fcavt=2:4,fcyr,fcsys=1,qt=3,ncpu=4) {
- #
- # Compute probabilistic bins (qt) of a forecast (fcyr) relative to the forecast climatology of the residual
- # forecasts excluding the selected forecast year (fcyr).
- # The maximal values of the lowest/highest quantiles is +/-10^+06 due to limitation in the function hist().
- # By default (qt=3) CFU_PB() computes terciles that can be plotted with CFU_plotmapmostliktercile().
- #
- # Description:
- #
- # Returns probabilistic bins of a forecast (fcyr) relative to the forecast climatology without selected forecast.
- #
- # Usage:
- #
- # CFU_PB(lon,lat,ano,fcavt=2:4,fcyr,fcsys=1,qt=3,ncpu=4)
- #
- # Arguments:
- #
- # lon: A vector of longitudes.
- #
- # lat: A vector of latitudes.
- #
- # ano: Array of forecast anomalies in CFU_load format.
- #
- # fcavt: A vector of time steps to average across.
- #
- # fcyr: Number of the forecast start year of the forecast.
- #
- # fcsys: Number of the forecast system.
- #
- # qt: Number of probabilistic bins 3: terciles, 4: quartiles, etc.
- #
- # ncpu: Number of CPUs to be used.
- #
- # Output:
- #
- # Probabilistic forecast information (1 or 0) at each grid point with
- # dimensions (number of ensemble members, qt, number of longitudes, latitudes).
- # Author:
- #
- # Fabian Lienert <flienert@ic3.cat> 29 Oct 2012.
- #
- library(doMC)
- registerDoMC(ncpu)
- # Probabilistic forecast.
- nlon <- length(lon); nlat <- length(lat)
- nyr <- dim(ano)[3]; nfc <- dim(ano)[2]; lt <- dim(ano)[4]
- PTC <- array(NA, dim=c(nfc,3,nlat,nlon))
- for (im in 1:nfc) {
- XY.p <- foreach(i=1:nlon) %dopar% {
- PL <- array(NA, dim=c(qt,nlat))
- for (j in 1:nlat) {
- # Mean over forecast range.
- f.p <- apply(ano[fcsys,im,-fcyr,fcavt,j,i],c(1),mean)
- # PDF of model climatology without selected forecast.
- qum <- quantile(f.p,probs=c((1:(qt-1))/qt),prob,na.rm=F,names=F,type=8)
- # Bins of selected forecast.
- f.p <- mean(ano[fcsys,im,fcyr,fcavt,j,i])
- if (all(c(qum,f.p)==0)) {
- f0 <- rep(0,qt)
- if (!is.integer(qt/2)) { f0[median(1:qt)] <- 1 }
- PL[,j] <- f0
- } else {
- PL[,j] <- hist(f.p,breaks=c(-1e+06,qum,1e+06),plot=F)$counts
- }
- }
- PL
- }
- for (i in 1:nlon) { PTC[im,,,i] <- XY.p[[i]] }
- }
- PTC
- }
- CFU_MODE <- function(ano,eof,mode=1) {
- #
- # Description:
- #
- # Project anomalies onto modes to get temporal evolution of the EOF mode selected.
- # Returns principal components (PCs) by area-weighted projection onto EOF pattern (from CFU_EOF()).
- # Able to handle NAs.
- #
- # Arguments:
- #
- # ano: Array of observed or forecast anomalies from CFU_ano() with dimensions
- # (number of forecast systems, ensemble members, start years, forecast months, latitudes, longitudes).
- #
- # eof: EOF object from CFU_EOF()
- #
- # mode: mode number in EOF object onto which to project
- #
- # Output:
- #
- # PC.ver: Array of PCs in verification format
- # (number of forecast systems, ensemble members, start years, forecast months).
- # PC.ver.em: Array of ensemble-mean PCs in verification format
- # (number of forecast systems, start years, forecast months).
- #
- # Author:
- #
- # Fabian Lienert <flienert@ic3.cat> 19 Nov 2012.
- #
- # Lauriane Batte <lauriane.batte@ic3.cat> March 2014. Bug-fixes: 1-Extra weighting of the anomalies before projection.
- # 2-Reversion of the anomalies along latitudes.
- # 3-Extra-normalisation not necessary.
- #
- # Virginie Guemas <virginie.guemas@ic3.cat> 17 March 2014. Bug-fixes:1-Another extra-normalisation.
- # 2-15 lines to compute the em reduced to 1.
- # Lauriane Batte <lauriane.batte@ic3.cat> 18 March 2014. Normalization by std before returning PCs to be coherent with CFU_EOF
- #
- # Virginie Guemas <virginie.guemas@ic3.cat> 11 April 2014. 1 - Removal of lon, lat, ncpu and neofs argument unused
- # 2 - Security checks ano and eof consistency
- # 3 - Removal of the mask which is already contained in the EOFs
- # 4 - Removal of the PC normalization since we have chosen in CFU_EOF
- # to normalize the EOFs and multiply the PCs by the normalization
- # factor and the eigenvalue so that the restitution of the original
- # field is done simply by PC * EOFs
- # 5 - The new convention in CFU_EOF is to divide by the weights so that the
- # reconstruction of the original field rather than the weighted field
- # is obtained by PC * EOFs. The EOFs need therefore to be multiplied
- # back by the weights before projection so that EOF * t(EOF) = 1
- # 6 - Since W *X = PC * EOF if EOF is multiplied back by the weights,
- # PC = W * X * t(EOF) and X the input field to be projected (X)
- # needs to be multiplied by W.
- #
- # library(s2dverification) # To simplify some lines. Virginie
- # Getting input dimensions. Fabian
- nlon<- dim(ano)[6]; nlat <- dim(ano)[5]
- nyr <- dim(ano)[3]; lt <- dim(ano)[4]
- nfc <- dim(ano)[2]; nmod <- dim(ano)[1]
- # Security checks. Virginie
- if ( dim(eof$EOFs)[2] != nlat ) {
- stop("Inconsistent number of latitudes between eof and input field")
- }
- if ( dim(eof$EOFs)[3] != nlon ) {
- stop("Inconsistent number of longitudes between eof and input field")
- }
- # Initialization of pc.ver. Fabian.
- pc.ver <- array(NA,dim=c(nmod,nfc,nyr,lt))
- # Weights are already in EOF and shouldn't be accounted for twice. Lauriane
- #e.1 <- eof$EOFs[mode,,]; mask <- eof$mask; wght <- eof$wght
- # Now the EOF outputs from CFU_EOF are masked . Virginie
- #e.1 <- eof$EOFs[mode,,]; mask <- eof$mask; wght <- rep(1,nlon*nlat)
- #dim(wght) <- c(nlon,nlat)
- # Since we have chosen as a new convention in CFU_EOF to divide by the
- # weights so that the restitution of the original field is only obtained
- # by PCs*EOFs, I multiply here back by the weigths to ensure that the
- # projection is correct. Virginie
- e.1 <- eof$EOFs[mode,,]*eof$wght
- # Since W *X = PC * EOF after such multiplication by the weights,
- # the PC is obtained by W * X * t(EOF) so we multiply the field to project
- # by the weights. Virginie
- ano <- ano*InsertDim(InsertDim(InsertDim(InsertDim(eof$wght,1,nmod),2,nfc),3,nyr),4,lt)
- # The 100 lines below can be simplified in a few lines as done below.
- # Indeed, we can loop on nmod, nfc, nyr and lt even if the length of these dimentions is only 1.
- # Furthermore, the PC of the ensemble mean anomaly equals the ensemble means of the PC of single
- # members. [ 1/n sum(Xi) ] * t(EOF) = 1/n sum [ Xi * t(EOF) ]. Virginie
- # # For one realization only. Fabian
- # if (nfc==1) {
- # # Case for only one member should not be separate from several members. We can loop over 1 member only. Virginie.
- # pc.ver.em <- FALSE
- # # Why is there no loop over the models ? Virginie.
- # for (iy in 1:nyr) {
- # for (il in 1:lt) {
- # if (!all(is.na(ano[1,1,iy,il,,]))) {
- # #t1 <- (t(ano[nmod,nfc,iy,il,nlat:1,])*e.1)
- # # Latitudes should not be reversed to stay coherent with CFU_EOF. Lauriane
- # t1 <- (ano[nmod,nfc,iy,il,,]*e.1)
- # # The division by sum(mask) is an additional extra-normalisation. Virginie
- # #nominator <- (sum(t1*wght*mask,na.rm=TRUE)/sum(wght*mask,na.rm=TRUE))
- # # The EOFs are already masked now in CFU_EOF. Virginie
- # #nominator <- sum(t1*mask,na.rm=TRUE)
- # nominator <- sum(t1,na.rm=TRUE)
- # # Normalization should not be necessary. Projection is a simple scalar product. Lauriane
- # #denominator <- (sum(e.1^2*wght*mask,na.rm=TRUE)/sum(wght*mask,na.rm=TRUE))
- # denominator <- 1.
- # pc.ver[1,1,iy,il] <- nominator/denominator
- # }
- # }
- # }
- # # Normalization by standard deviation. Lauriane
- # #for (il in 1:lt) {
- # # pc.ver[1,1,,il] <- pc.ver[1,1,,il]/sd(pc.ver[1,1,,il])
- # #}
- # # I have removed this normalization in agreement with the conventions chosen for CFU_EOF.
- # # The EOFs are normalized to 1 and the simple product EOF*PC should restitute the original field.
- # # Virginie
- # } else {
- # pc.ver.em <- array(NA,dim=c(nmod,nyr,lt))
- # # Virginie. This single line below gives the same result as l. 1162-1176
- # ano.em <- apply(ano,c(1,3,4,5,6),mean,na.rm=TRUE)
- # for (imo in 1:nmod) {
- # # Ensemble mean anomalies.
- # #if (lt==1 | nyr==1 ) {
- # # if (lt==1 & !nyr==1 ) {
- # # ano.em <- apply(ano[imo,,,,,],c(2,3,4),mean,na.rm=TRUE)
- # # dim(ano.em) <- c(nyr,1,nlat,nlon)
- # # } else if (!lt==1 & nyr==1 ) {
- # # ano.em <- apply(ano[imo,,,,,],c(2,3,4),mean,na.rm=TRUE)
- # # dim(ano.em) <- c(1,lt,nlat,nlon)
- # # } else if (lt==1 & nyr==1 ) {
- # # ano.em <- apply(ano[imo,,,,,],c(2,3),mean,na.rm=TRUE)
- # # dim(ano.em) <- c(1,1,nlat,nlon)
- # # }
- # #} else {
- # # ano.em <- apply(ano[imo,,,,,],c(2,3,4,5),mean,na.rm=TRUE)
- # #}
- # for (iy in 1:nyr) {
- # for (il in 1:lt) {
- # if (!all(is.na(ano.em[imo,iy,il,,]))) {
- # #t1 <- (t(ano.em[imo,iy,il,nlat:1,])*e.1)
- # # Latitudes should not be reversed to stay coherent with CFU_EOF. Lauriane.
- # t1 <- (ano.em[imo,iy,il,,]*e.1)
- # # The division by sum(mask) is an additional extra-normalisation. Virginie
- # #nominator <- (sum(t1*wght*mask,na.rm=TRUE)/sum(wght*mask,na.rm=TRUE))
- # # The EOFs are already masked now in CFU_EOF. Virginie
- # #nominator <- sum(t1*mask,na.rm=TRUE)
- # nominator <- sum(t1,na.rm=TRUE)
- # # Normalization should not be necessary. Projection is a simple scalar product. Lauriane
- # #denominator <- (sum(e.1^2*wght*mask,na.rm=TRUE)/sum(wght*mask,na.rm=TRUE))
- # denominator <- 1.
- # pc.ver.em[imo,iy,il] <- nominator/denominator
- # }
- # }
- # }
- # # Normalization by standard deviation: done for each model independently. Lauriane
- # #for (il in 1:lt) {
- # # pc.ver.em[imo,,il] <- pc.ver.em[imo,,il]/sd(pc.ver.em[imo,,il])
- # #}
- # # I have removed this normalization in agreement with the conventions chosen for CFU_EOF.
- # # The EOFs are normalized to 1 and the simple product EOF*PC should restitute the original field.
- # # Virginie
- # # Ensemble member anomalies.
- # for (im in 1:nfc) {
- # for (iy in 1:nyr) {
- # for (il in 1:lt) {
- # if (!all(is.na(ano[imo,im,iy,il,,]))) {
- # #t1 <- (t(ano[imo,im,iy,il,nlat:1,])*e.1)
- # # Latitudes should not be reversed to stay coherent with CFU_EOF. Lauriane.
- # t1 <- (ano[imo,im,iy,il,,]*e.1)
- # # The division by sum(mask) is an additional extra-normalisation. Virginie
- # #nominator <- (sum(t1*wght*mask,na.rm=TRUE)/sum(wght*mask,na.rm=TRUE))
- # # The EOFs are already masked now in CFU_EOF. Virginie
- # #nominator <- sum(t1*mask,na.rm=TRUE)
- # nominator <- sum(t1,na.rm=TRUE)
- # # Normalization should not be necessary. Projection is a simple scalar product. Lauriane
- # #denominator <- (sum(e.1^2*wght*mask,na.rm=TRUE)/sum(wght*mask,na.rm=TRUE))
- # denominator <- 1.
- # pc.ver[imo,im,iy,il] <- nominator/denominator
- # }
- # }
- # }
- # # Normalization by standard deviation: done for each model and ensemble member independently. Lauriane
- # #for (il in 1:lt) {
- # # pc.ver[imo,im,,il] <- pc.ver[imo,im,,il]/sd(pc.ver[imo,im,,il])
- # #}
- # # I have removed this normalization in agreement with the conventions chosen for CFU_EOF.
- # # The EOFs are normalized to 1 and the simple product EOF*PC should restitute the original field.
- # # Virginie
- # }
- # }
- # }
- for (imo in 1:nmod) {
- for (im in 1:nfc) {
- for (iy in 1:nyr) {
- for (il in 1:lt) {
- if (!all(is.na(ano[imo,im,iy,il,,]))) {
- pc.ver[imo,im,iy,il] <- sum(ano[imo,im,iy,il,,]*e.1,na.rm=TRUE)
- }
- }
- }
- }
- }
- # For compatibility with previous version, I keep the output of the ensemble mean although it is useless
- # because the operation can be done easily outside of CFU_MODE if necessary. This should be removed in
- # future. Virginie
- pc.ver.em=Mean1Dim(pc.ver,2)
- return(list(PC.ver=pc.ver,PC.ver.em=pc.ver.em))
- }
- CFU_DTF <- function(lon,lat,ano,gmt,ncpu=4) {
- #
- # Linearly regress out forecast global mean temperature trend (GMT) from forecast monthly anomalies
- # of ten-year hindcasts started each year. The regression depends on the forecast lead month across all forecasts.
- # NAs are excluded from the linear model fit, but kept in the output.
- #
- # Description:
- #
- # Returns linearly detrended forecast anomalies in the verification format (from CFU_ano()).
- #
- # Usage:
- #
- # CFU_DTF(lon,lat,ano,gmt)
- #
- # Arguments:
- #
- # lon: A vector of longitudes.
- #
- # lat: A vector of latitudes.
- #
- # ano: Array of forecast anomalies from CFU_ano() with dimensions
- # (number of forecast systems, ensemble members, start years, forecast months, latitudes, longitudes).
- #
- # gmt: Array of forecast global mean temperature anomalies with dimensions
- # (number of forecast systems, ensemble members, start years, forecast months).
- #
- # ncpu: Number of CPUs to be used.
- #
- # Output:
- #
- # ano.dt.ver: Array of detrended forecast anomalies from CFU_ano() with dimensions
- # (number of forecast systems, ensemble members, start years, forecast months, latitudes, longitudes).
- #
- # Author:
- #
- # Fabian Lienert <flienert@ic3.cat> 16 Nov 2012.
- #
- library(doMC)
- registerDoMC(ncpu)
- nlon <- length(lon); nlat <- length(lat)
- nyr <- dim(ano)[3]; lt <- dim(ano)[4]
- nfc <- dim(ano)[2]; nmod <- dim(ano)[1]
- ano.dt.ver <- array(NA,dim=c(nmod,nfc,nyr,lt,nlat,nlon))
- ## Map of regression slopes.
- r.XY <- array(NA,dim=c(nmod,lt,nlat,nlon))
- for (imo in 1:nmod) {
- XY.p <- foreach(i=1:nlon, .verbose=F) %dopar% {
- PL1 <- array(NA,dim=c(nfc,nyr,lt,nlat))
- PL2 <- array(NA,dim=c(lt,nlat))
- for (j in 1:nlat) {
- for (iy in 1:lt) {
- # Forecast anomaly.
- fctpr <- as.vector(ano[imo,,,iy,j,i])
- # Forecast trend wrt. 1961-2011.
- trend.f <- as.vector(gmt[imo,,,iy])
- trend <- trend.f-mean(trend.f)
- # Regress forecast anomaly onto trend and remove it from the forecast. Keep residual forecast anomaly.
- if (!all(is.na(fctpr))) {
- fit <- lm(fctpr~trend,na.action=na.exclude)
- fctpr.dt <- residuals(fit)
- dim(fctpr.dt) <- c(nfc,nyr)
- PL1[,,iy,j] <- fctpr.dt
- # Regression coefficient map.
- PL2[iy,j] <- coefficients(fit)[[2]]
- }
- }
- }
- return(list(DFC=PL1,REG=PL2))
- }
- for (i in 1:nlon) {
- ano.dt.ver[imo,,,,,i] <- XY.p[[i]]$DFC
- r.XY[imo,,,i] <- XY.p[[i]]$REG
- }
- }
- gc()
- return(list(ano.dt.ver=ano.dt.ver,reg=r.XY))
- }
- CFU_DTO <- function(lon,lat,ano,yr1,yr2,yrint,gmt=F) {
- #
- # Linearly regress out global mean temperature trend (GMT) from observed monthly anomalies
- # that coincide with hindcasts started on November 1st every 1,2,3, etc. years.
- # The anomalies are selected from the first year that coincide with each hindcast.
- # NAs are excluded from the linear model fit, but kept in the output.
- #
- # Description:
- #
- # Returns linearly detrended and original anomalies in a format to be used for CFU_EOF().
- #
- # Usage:
- #
- # CFU_DTO(lon,lat,ano,yr1,yr2,yrint,gmt=F)
- #
- # Arguments:
- #
- # lon: A vector of longitudes.
- #
- # lat: A vector of latitudes.
- #
- # ano: Array of anomalies from CFU_ano() with dimensions
- # (1, 1, number of start years, forecast months, latitudes, longitudes).
- #
- # yr1: First start year.
- #
- # yr2: Last start year.
- #
- # yrint: Interval between hindcasts in years.
- #
- # gmt: Global mean temperature time series. (optional)
- #
- # Output:
- #
- # ano.dt.ver: An array of the linearly detrended anomalies in CFU verification format.
- #
- # ano.dt: An array of the linearly detrended anomalies with dimensions
- # (number of months, latitudes, longitudes).
- #
- # ano: An array of the original input anomalies with dimensions
- # (number of months, latitudes, longitudes).
- #
- # reg: An array containing a map of regression coefficients
- # (number of latitudes, longitudes).
- #
- # Author:
- #
- # Fabian Lienert <flienert@ic3.cat> 19 Apr 2013.
- #
- nlon <- length(lon); nlat <- length(lat)
- nyr <- dim(ano)[3]; lt <- dim(ano)[4]
- print(nyr); print(lt)
- if (all(gmt==F)) {
- # Global mean temperature GMT from HadCRUT, climate explorer.
- # http://climexp.knmi.nl/data/icrutem3_hadsst2_0-360E_-90-90N_n.dat
- T1 <- read.table("/cfu/data/ukmo/hadcrut/icrutem3_hadsst2_0-360E_-90-90N_n.dat",row.names=1)
- XT <- array(NA,dim=c(12,113))
- for (im in 1:12) {
- XT[im,] <- T1[[im]]
- }
- XT[XT==-9.999000e+02] <- NA
- # GMT anomaly wrt 1356 mo, 113 yr.
- tgl.obs <- ts(c(as.vector(XT),rep(NA,120)),deltat=1/12,start=c(1900,1),end=c(2022,12))
- # GMT anomaly wrt verification period.
- tgl.vp.nov.obs <- window(tgl.obs,start=c(yr1,11),end=c((yr2+lt/12),10))
- tgl.vp.nov.obs <- tgl.vp.nov.obs*((yr2-yr1+lt/12)/113)
- gmt <- tgl.vp.nov.obs
- }
- o.XY <- array(NA,dim=c(((yr2-yr1+lt/12)*12),nlat,nlon))
- TP1 <- ano[1,1,,1:(yrint*12),nlat:1,]
- TP1 <- aperm(TP1, c(2,1,3,4))
- dim(TP1) <- c(nyr*yrint*12,nlat,nlon)
- o.XY[1:(nyr*yrint*12),,] <- TP1
- o.XY[((nyr*yrint*12)+1):((yr2-yr1+lt/12)*12),,] <- ano[1,1,nyr,((yrint*12)+1):lt,nlat:1,]
- if (length(gmt)!=((yr2-yr1+lt/12)*12)) {
- print(paste('GMT time series has not the same length than the array of anomalies.',length(gmt),((yr2-yr1+lt/12)*12)))
- stop()
- }
- # Remove trend from the obs for the whole period.
- o.XY.dt <- array(NA,dim=c(((yr2-yr1+lt/12)*12),nlat,nlon))
- r.XY <- (o.XY.dt[1,,]*NA)
- for (i in 1:nlon) {
- for (j in 1:nlat) {
- if (!is.na(o.XY[1,j,i])) {
- # Regress anomaly onto GMT and remove it from the anomaly.
- a1 <- o.XY[,j,i]
- # Exclude NAs from linear model estimate, but keep them in the residual time series. (!=$residuals)
- fit <- lm(a1~as.vector(gmt),na.action=na.exclude)
- a1.dt <- residuals(fit)
- o.XY.dt[,j,i] <- a1.dt
- # Regression coefficient map.
- r.XY[j,i] <- coefficients(fit)[[2]]
- }
- }
- }
- # Put detrended obs in verification format.
- ano.dt.ver <- array(NA,dim=c(1,1,nyr,lt,nlat,nlon))
- # Works for forecast start date in each year.
- for (i in 1:nlon) {
- for (j in 1:nlat) {
- d.all <- ts(o.XY.dt[,j,i],deltat=1/12,start=c(yr1,11),end=c((yr2+lt/12),10))
- if (!is.na(o.XY[1,j,i])) {
- for (iy in 1:nyr) {
- d.sl <- window(d.all,start=c((yr1+iy*yrint-yrint),11),end=c((yr1+lt/12+iy*yrint-yrint),10))
- ano.dt.ver[1,1,iy,,((nlat+1)-j),i] <- d.sl
- }
- }
- }
- }
- return(list(ano.dt=o.XY.dt,ano=o.XY,reg=r.XY,ano.dt.ver=ano.dt.ver,gmt=gmt,tgl.obs=tgl.obs))
- }
- CFU_DTO_an <- function(lon,lat,ano,yr1,yr2,yrint,gmt=F) {
- #
- # Linearly regress out global mean temperature trend (GMT) from observed annual anomalies
- # that coincide with hindcasts started on November 1st every 1,2,3, etc. years.
- # The anomalies are selected from the first year that coincide with each hindcast.
- # NAs are excluded from the linear model fit, but kept in the output.
- #
- # Description:
- #
- # Returns linearly detrended and original anomalies in a format to be used for CFU_EOF().
- #
- # Usage:
- #
- # CFU_DTO_an(lon,lat,ano,yr1,yr2,yrint,gmt=F)
- #
- # Arguments:
- #
- # lon: A vector of longitudes.
- #
- # lat: A vector of latitudes.
- #
- # ano: Array of anomalies from CFU_ano() with dimensions
- # (1, 1, number of start years, forecast years, latitudes, longitudes).
- #
- # yr1: First start year.
- #
- # yr2: Last start year.
- #
- # yrint: Interval between hindcasts in years.
- #
- # gmt: Global mean temperature time series. (optional)
- #
- # Output:
- #
- # ano.dt.ver: An array of the linearly detrended anomalies in CFU verification format.
- #
- # ano.dt: An array of the linearly detrended anomalies with dimensions
- # (number of years, latitudes, longitudes).
- #
- # ano: An array of the original input anomalies with dimensions
- # (number of years, latitudes, longitudes).
- #
- # reg: An array containing a map of regression coefficients
- # (number of latitudes, longitudes).
- #
- # Author:
- #
- # Fabian Lienert <flienert@ic3.cat> 15 May 2013.
- #
- nlon <- length(lon); nlat <- length(lat)
- nyr <- dim(ano)[3]; lt <- dim(ano)[4]
- if (all(gmt==F)) {
- # Global mean temperature GMT from HadCRUT, climate explorer.
- # http://climexp.knmi.nl/data/icrutem3_hadsst2_0-360E_-90-90N_n.dat
- T1 <- read.table("/cfu/data/ukmo/hadcrut/icrutem3_hadsst2_0-360E_-90-90N_n.dat",row.names=1)
- XT <- array(NA,dim=c(12,113))
- for (im in 1:12) {
- XT[im,] <- T1[[im]]
- }
- XT[XT==-9.999000e+02] <- NA
- # GMT anomaly wrt 1356 mo, 113 yr.
- tgl.obs <- ts(c(as.vector(XT),rep(NA,120)),deltat=1/12,start=c(1900,1),end=c(2022,12))
- # GMT anomaly wrt verification period.
- tgl.vp.nov.obs <- window(tgl.obs,start=c(yr1,11),end=c(((yr2+lt)),10))
- tgl.vp.nov.obs <- tgl.vp.nov.obs*((yr2-yr1+lt)/113)
- gmt <- aggregate(tgl.vp.nov.obs,FUN=mean)
- }
- o.XY <- array(NA,dim=c(((yr2-yr1+lt)),nlat,nlon))
- if (yrint>1) {
- TP1 <- ano[1,1,,1:(yrint),nlat:1,]
- TP1 <- aperm(TP1, c(2,1,3,4))
- } else {
- TP1 <- ano[1,1,,1,nlat:1,]
- }
- dim(TP1) <- c(nyr*yrint,nlat,nlon)
- o.XY[1:(nyr*yrint),,] <- TP1
- o.XY[((nyr*yrint)+1):((yr2-yr1+lt)),,] <- ano[1,1,nyr,((yrint)+1):lt,nlat:1,]
- if (length(gmt)!=((yr2-yr1+lt))) {
- print(paste('GMT time series has not the same length than the array of anomalies.',length(gmt),((yr2-yr1+lt))))
- stop()
- }
- # Remove trend from the obs for the whole period.
- o.XY.dt <- array(NA,dim=c(((yr2-yr1+lt)),nlat,nlon))
- r.XY <- (o.XY.dt[1,,]*NA)
- for (i in 1:nlon) {
- for (j in 1:nlat) {
- if (!is.na(o.XY[1,j,i])) {
- # Regress anomaly onto GMT and remove it from the anomaly.
- a1 <- o.XY[,j,i]
- # Exclude NAs from linear model estimate, but keep them in the residual time series. (!=$residuals)
- fit <- lm(a1~as.vector(gmt),na.action=na.exclude)
- a1.dt <- residuals(fit)
- o.XY.dt[,j,i] <- a1.dt
- # Regression coefficient map.
- r.XY[j,i] <- coefficients(fit)[[2]]
- }
- }
- }
- # Put detrended obs in verification format.
- ano.dt.ver <- array(NA,dim=c(1,1,nyr,lt,nlat,nlon))
- # Works for forecast start date in each year.
- for (i in 1:nlon) {
- for (j in 1:nlat) {
- d.all <- ts(o.XY.dt[,j,i],deltat=1,start=c(yr1,11),end=c((yr2+lt),10))
- if (!is.na(o.XY[1,j,i])) {
- for (iy in 1:nyr) {
- d.sl <- window(d.all,start=c((yr1+iy*yrint-yrint),11),end=c((yr1+lt+iy*yrint-yrint),10))
- ano.dt.ver[1,1,iy,,((nlat+1)-j),i] <- d.sl
- }
- }
- }
- }
- return(list(ano.dt=o.XY.dt,ano=o.XY,reg=r.XY,ano.dt.ver=ano.dt.ver,gmt=gmt))
- }
- CFU_EOF <- function(lon,lat,ano,neofs=15,corr=FALSE) {
- #
- # Description:
- #
- # Performs an area-weighted EOF analysis using SVD based on a covariance matrix by default,
- # based on a correlation matrix if corr argument is set to TRUE.
- #
- # Arguments:
- #
- # lon: A vector of longitudes.
- #
- # lat: A vector of latitudes.
- #
- # ano: Array of anomalies with dimensions
- # (number of timesteps, number of latitudes, number of longitudes).
- #
- # neofs: Number of modes to be kept. Default = 15
- #
- # corr: based on correlation matrix rather than covariance matrix. Default = FALSE
- #
- # Output:
- #
- # EOFs: An array of EOF patterns normalized to 1 with dimensions
- # (number of modes, number of latitudes, number of longitudes)
- #
- # PCs: An array of principal components with dimensions
- # (number of timesteps, number of modes)
- #
- # Var: Percentage (%) of variance fraction of total variance explained by each mode (number of modes).
- #
- # mask: Mask with dimensions (number of latitudes, number of longitudes)
- #
- # wght: Weights with dimensions (number of latitudes, number of longitudes)
- #
- # Author:
- #
- # Fabian Lienert <flienert@ic3.cat> 29 Oct 2012. Inspired by R. Benestad's EOF() in R-package clim.pact.
- #
- # Lauriane Batte <lauriane.batte@ic3.cat> March 2014. Bug-fixes : 1-reversion of latitudes in the weights
- # 2-correlation matrix was used instead of covariance
- # 3-double use of the weights
- #
- # Virginie Guemas <virginie.guemas@ic3.cat> 17 March 2014. Bug-fixes: 1-weight computation - division by sum of cos(lat)
- # 2-shuffling of EOFs in EOF.2 intermediate vector
- # 3-crash when neofs=1 sorted out
- # 4-crash when neofs>nt sorted out
- #
- # Lauriane Batte <lauriane.batte@ic3.cat> 19 March 2014 : BIG cleanup of code and clarification
- # Reduction of the number of transpositions and bug-fixes associated
- # Remove of the obsolete LINPACK options
- #
- # Virginie Guemas <virginie.guemas@ic3.cat> 11 April 2014.
- # 1 - Bug-fix in dimensions handling
- # EOF composition restitutes now the original field in all cases
- # 2 - Simplification of the convention transpose
- # 3 - Options to use the correlation matrix rather than the covariance matrix
- # 4 - Security checks
- # 5 - New normalization of PCs so that PC*EOF only reconstruct the original file
- # 6 - Weights = sqrt(cos(lat)) for ano so that covariance matrice weighted by cos(lat)
- # 7 - Division of EOF by weights so that the reconstruction is simply EOF * PC
- #
- library(s2dverification)
- nlon <- length(lon); nlat <- length(lat)
- # The two lines below have been moved up for the security checks. Virginie
- dim.dat <- dim(ano)
- ny <- dim.dat[2]; nx <- dim.dat[3]; nt <- dim.dat[1]
- # Security check. Virginie
- if ( ny != nlat ) {
- stop("Inconsistent number of latitudes and input field dimensions")
- }
- if ( nx != nlon ) {
- stop("Inconsistent number of longitudes and input field dimensions")
- }
- # Buildup of the mask. Fabian.
- mask <- ano[1,,]; mask[!is.finite(mask)] <- NA
- mask[is.finite(mask)] <- 1
- # Replace mask of NAs with 0s for EOF analysis. Fabian
- ano[!is.finite(ano)] <- 0
- # Area weighting. Weights for EOF. Fabian. Bug below. Lauriane
- # wght <- array(cos(rev(lats)*pi/180),dim=(c(nlat,nlon)))
- # The reversion of latitudes is not suitable. Lauriane
- wght <- array(cos(lat*pi/180),dim=(c(nlat,nlon)))
- # The sum of the weights should equal to 1. Virginie
- wght <- wght/sum(wght,na.rm=T)
- # We want the covariance matrix to be weigthed by the grid cell area
- # so the anomaly field should be weighted by its square root since
- # the covariance matrix equals transpose(ano) times ano. Virginie.
- wght <- sqrt(wght)
- # Fabian was initially dividing by stdv to get the correlation matrix.
- # Weighting of the anomalies that corresponds to the use of a covariance matrix. Lauriane.
- #for (it in 1:nt) {ano[it,,] <- ano[it,,]*wght/stdv}
- ano <- ano*InsertDim(wght,1,nt)
- # The use of the correlation matrix is done under the option corr. Virginie.
- if (corr==TRUE) {
- stdv <- sd(ano,na.rm=T)
- ano <- ano/InsertDim(stdv,1,nt)
- }
- # Time/space matrix for SVD. Fabian
- dim(ano) <- c(nt,ny*nx)
- dim.dat <- dim(ano)
- # "transposed" means already transposed. Fabian's convention.
- # This is very misleading because the user has actually not transposed its array.
- # Only by chance, the spatial dimension is smaller than the time dimension.
- # I therefore change the convention to:
- # "transpose" means the array needs to be transposed before calling La.svd for
- # computational efficiency because the spatial dimension is larger than the time
- # dimension. This goes with transposing the outputs of LA.svd also. Virginie
- #print(c('dim (nt,npts):',dim.dat))
- #if (dim.dat[2] < dim.dat[1]) {transposed <- TRUE} else {transposed <- FALSE}
- if (dim.dat[2] > dim.dat[1]) {transpose <- TRUE} else {transpose <- FALSE}
- if (transpose) {
- pca <- La.svd(t(ano))
- } else {
- pca <- La.svd(ano)
- }
- # Remove of the obsolete LINPACK cases. Lauriane.
- # The number of output eofs might be lower than 15 if less than 15 grid points are used. Fabian.
- #if (neofs > dim(ano)[2]) {neofs <- dim(ano)[2]}
- # Whatever the spatial or temporal dimension is smaller than neofs, neofs should be bounded. Virginie.
- neofs=min(dim(ano),neofs)
- # The line below by Fabian is not used. Virginie
- #dim.v <- dim(pca$v); dim.u <- dim(pca$v); dim.x <- dim(ano)
- #
- # Lauriane :
- # La.svd conventions: decomposition X = U D t(V)
- # La.svd$u returns U
- # La.svd$d returns diagonal values of D
- # La.svd$v returns t(V) !!
- # The usual convention is PC=U and EOF=V.
- # If La.svd is called for ano (transpose=FALSE case):
- # EOFs: $v
- # PCs: $u
- # If La.svd is called for t(ano) (transposed=TRUE case):
- # EOFs: t($u)
- # PCs: t($v)
- if (transpose) {
- pca.EOFs <- t(pca$u)
- pca.PCs <- t(pca$v)
- #print('transpose')
- #print(c('Dim(pca.EOFs) :',dim(pca.EOFs)))
- #print(c('Dim(pca.PCs) :',dim(pca.PCs)))
- } else {
- pca.EOFs <- pca$v
- pca.PCs <- pca$u
- #print('not transpose')
- #print(c('Dim(pca.EOFs) :',dim(pca.EOFs)))
- #print(c('Dim(pca.PCs) :',dim(pca.PCs)))
- }
- # The numbers of transposition has been reduced. Virginie
- #PC <- pca.PCs[1:neofs,]
- PC <- pca.PCs[,1:neofs]
- EOF <- pca.EOFs[1:neofs,]
- # The lines seem to be at the origin of the lack of recomposition in last version. Virginie.
- #dim(EOF) <- c(neofs,nx,ny)
- #EOF <- aperm(EOF, c(1,3,2))
- dim(EOF) <- c(neofs,ny,nx)
- # To sort out crash when neofs=1. Virginie
- if (neofs==1) {
- PC=InsertDim(PC,2,1)
- #EOF=InsertDim(EOF,1,1) # This is not necessary with line dim(EOF) above. Lauriane
- }
- # Computation of the % of variance associated with each mode. Fabian.
- W <- pca$d[1:neofs]
- tot.var <- sum(pca$d^2)
- Var.eof <- 100 * pca$d[1:neofs]^2/tot.var
- for (e in 1:neofs) {
- # sg is initialized here. It detects the sign of the pattern and latter the pattern is
- # multiply by -1 if necessary to obtain the nominal IPO or PDO. Fabian
- # CFU_EOF should be generic enough to compute any type of EOF. The change of sign of
- # the pattern should be done outside CFU_EOF. Lauriane & Virginie.
- #sg <- 1
- # Factor to normalize the PC.
- eof.pc.sd <- sd(PC[,e])
- # Factor to normalize the EOF.
- eof.patt.nn <- EOF[e,,]*mask
- # Weights have already been taken into account in covariance matrix. Lauriane.
- #eof.patt.ms <- sum(eof.patt.nn^2*t(wght*mask),na.rm=TRUE)/sum(t(wght*mask),na.rm=TRUE)
- eof.patt.ms <- sum(eof.patt.nn^2,na.rm=TRUE)
- # Normalize PC and EOF
- eof.patt <- eof.patt.nn/eof.patt.ms
- # I would rather multiply eof.pc by eof.patt.ms and W[e] so that the user can reconstruct
- # the original field by EOF * PC only (*weight) rather than EOF * PC * (normalization
- # factor that the user does not know) * (eigenvalue that is not straightforward to get back
- # from the % of variance explained) (*weight). The division by sd(pc) is very easy to do
- # outside CFU_EOF in case the user needs a normalized PC but it is more difficult to get
- # back the amplitude of the variability represented by a given mode outside CFU_EOF if
- # we do not output such a muliplied PC multiplied. Virginie.
- #eof.pc <- PC[,e]/eof.pc.sd
- eof.pc <- PC[,e]*eof.patt.ms*W[e]
- # I am also wondering if we should not output the EOF divided by the weight
- # so that the reconstruction is only EOF * PC only rather than EOF * PC * weight
- # since we have multiplied ano by weight. Virginie
- eof.patt <- eof.patt/wght
- # Change the sign of first EOF pattern for PDO, IPO, etc. Fabian
- #if (e==1) if (eof.patt[round(nlon/2),round(nlat/2)] > 0) sg <- -1
- EOF[e,,] <- eof.patt
- PC[,e] <- eof.pc
- }
- return(list(EOFs=EOF,PCs=PC,Var=Var.eof,mask=mask,wght=wght))
- }
- CFU_plotbox <- function(NAO,yr1,yr2,tit='NAO index',tar='DJF',pr.qu=expression('NAO index (PC1) SLP'~(sigma)),obs='ERAInterim',fcsys='EC-Earth 3',file='NAO_fc.ps') {
- ##
- ## Produce time series plot showing the distribution of the forecast members with box-and-whisker plot.
- ##
- ## Description:
- ##
- ## Generates vector graphics file of a forecast box-and-whisker plot time series vs. the observed evolution.
- ## Only works for re-forecasts started each year.
- ##
- ## Usage:
- ##
- ## CFU_plotbox(lon,lat,NAO,yr1,yr2,tit='NAO index',tar='DJF',pr.qu='NAO index (PC1) SLP',
- ## obs='ERAInterim',fcsys='EC-Earth 3',file='NAO_fc.ps')
- ## Arguments:
- ##
- ## NAO: List of obs, forecast arrays of, e.g., the NAO index for each ensemble member, output of CFU_NAO().
- ## yr1: Year of first forecast target period.
- ##
- ## yr2: Year of last forecast target period.
- ##
- ## tit: Title of the Figure.
- ##
- ## tar: Forecast target period.
- ##
- ## pr.qu: Prognostic quantity.
- ##
- ## obs: Observational dataset.
- ##
- ## fcsys: Climate forecast system.
- ##
- ## file: Name of the postscript output file.
- ##
- ## Output:
- ##
- ## Vector graphics file of a forecast box-and-whisker plot time series vs. the observed evolution.
- ##
- ## Author:
- ##
- ## Fabian Lienert <flienert@ic3.cat> 09 Aug 2013.
- ##
- ## Modifications:
- ##
- ## Lauriane Batte <lauriane.batte@ic3.cat> 18 Mar 2014
- ## 1. No normalization of the indices (already taken care of
- ## in previous functions)
- ## 2. Taking into account NA cases (missing data)
- ##
- ##
- nyr <- dim(NAO$NAOO.ver)[3]
- postscript(file,horizontal=T)
- ## Observed time series.
- pc.o <- ts(NAO$NAOO.ver[1,1,],deltat=1,start=yr1,end=yr2)
- ## Normalization of obs, forecast members.
- ## This has already been done in CFU_MODE
- # pc.o <- pc.o/sd(pc.o)
- # sd.fc <- apply(NAO$NAOF.ver[1,,],c(1),sd)
- # NAO$NAOF.ver[1,,] <- NAO$NAOF.ver[1,,]/sd.fc
- ## Produce plot.
- par(mar = c(5, 6, 4, 2))
- boxplot(NAO$NAOF.ver[1,,],add=F,main=paste(tit,tar),ylab='',xlab='',
- col='red',lwd=2,t='b',axes=F,cex.main=2,
- ylim=c(-max(abs(c(NAO$NAOF.ver[1,,],pc.o)),na.rm=TRUE),max(abs(c(NAO$NAOF.ver[1,,],pc.o)),na.rm=TRUE)))
- lines(1:nyr,pc.o,lwd=3,col='blue'); abline(h=0,lty=1)
- legend('bottomleft',c(obs,fcsys),lty=c(1,1),lwd=c(3,3),pch=c(NA,NA),
- col=c('blue','red'),horiz=T,bty='n',cex=2.2)
- axis(1,c(1:nyr),NA,cex.axis=2.0)
- axis(1,seq(1,nyr,by=1),seq(start(pc.o)[1],end(pc.o)[1],by=1),cex.axis=2.0)
- mtext(1,line=3,text=tar,cex=1.9)
- mtext(3,line=-2,text=paste(' AC =',round(cor(pc.o,apply(NAO$NAOF.ver[1,,],c(2),mean,na.rm=TRUE)),2)),cex=1.9,adj=0)
- axis(2,cex.axis=2.0)
- mtext(2,line=3,text=pr.qu,cex=1.9)
- box()
- dev.off()
- }
- CFU_NAO <- function(lon,lat,ano.f,ano.o,fcavt=2:4,fcsys=1) {
- ##
- ## Compute the North Atlantic Oscillation (NAO) index based on the leading EOF of sea level pressure (SLP) anomalies.
- ## The PCs are obtained by projecting the forecast and oberved anomalies onto the observed EOF pattern.
- ## By default (fcavt=2:4) CFU_NAO() computes the NAO index for 1-month lead seasonal forecasts that can be
- ## plotted with CFU_boxplot().
- ##
- ## Description:
- ##
- ## Returns cross-validated PCs of the NAO index for forecast (ano.f) and observations (ano.o) based on the
- ## observed leading EOF pattern.
- ##
- ## Usage:
- ##
- ## CFU_NAO(lon,lat,ano.f,ano.o,fcavt=2:4,fcsys=1)
- ##
- ## Arguments:
- ##
- ## lon: A vector of longitudes.
- ##
- ## lat: A vector of latitudes.
- ##
- ## ano.f: Array of North Atlantic SLP (0-60N, 80W-0) forecast anomalies from CFU_ano() with dimensions
- ## (number of forecast systems, ensemble members, start years, forecast months, latitudes, longitudes).
- ##
- ## ano.o: Array of North Atlantic SLP (0-60N, 80W-0) forecast anomalies from CFU_ano() with dimensions
- ## (number of forecast systems, ensemble members, start years, forecast months, latitudes, longitudes).
- ##
- ## fcavt: A vector of time steps to average across defining the target period.
- ##
- ## fcsys: Number of the forecast system.
- ##
- ##
- ## Output:
- ##
- ## NAOF.ver: Array of forecast NAO index in verification format
- ## (number of forecast systems, ensemble members, start years, forecast months).
- ##
- ## NAOO.ver: Array of observed NAO index in verification format
- ## (1, 1, number of start years, forecast months).
- ##
- ## Author:
- ##
- ## Fabian Lienert <flienert@ic3.cat> 08 Aug 2013.
- ##
- ## Lauriane Batte <lauriane.batte@ic3.cat> March 2013.
- ##
- ## Virginie Guemas <virginie.guemas@ic3.cat> 17 March 2013. Removing the rotation.
- ##
- nlon <- length(lon); nlat <- length(lat)
- nyr <- dim(ano.f)[3]; nfc <- dim(ano.f)[2]; lt <- dim(ano.f)[4]
- # A security check should be added nlon==nx & nlat==ny. Virginie.
- nlonr <- ceiling(length(lon[lon<180 & lon>=0]))
- ## Rotate field to start west of 0 longitude. Fabian.
- # This rotation should be done after the projection to output the
- # NAO pattern only. Virginie.
- #ano.o.sh <- (ano.o*NA)
- #ano.o.sh[1,1,,,,] <- ano.o[1,1,,,,c((nlonr+1):nlon,1:nlonr)]
- #ano.f.sh <- (ano.f*NA)
- #ano.f.sh[1,,,,,] <- ano.f[fcsys,,,,,c((nlonr+1):nlon,1:nlonr)]
- #remove(ano.o,ano.f);
- ano.f.sh <- ano.f; ano.o.sh=ano.o
- gc()
- ## Target period mean. Fabian.
- # The same operations can be done in half the lines using s2dverification. Virginie
- # Also this operation could be more general using CFU_season. Virginie
- o1 <- apply(ano.o.sh[1,1,,fcavt,,],c(1,3,4),mean,na.rm=T)
- dim(o1) <- c(1,1,nyr,1,nlat,nlon)
- f1 <- apply(ano.f.sh[1,,,fcavt,,],c(1,2,4,5),mean,na.rm=T)
- dim(f1) <- c(1,nfc,nyr,1,nlat,nlon)
- ## Cross-validated PCs. Fabian.
- # This should be extended to nmod and nlt by simple loops. Virginie
- NAOF.ver <- array(NA,c(1,nfc,nyr))
- NAOO.ver <- array(NA,c(1,1,nyr))
- for (iy in 1:nyr) {
- ## Observed EOF excluding one forecast start year. Fabian
- # Without the rotation and only 1 EOF. Virginie
- # OEOF <- CFU_EOF(c(lon[c((nlonr+1):nlon)]-360,lon[1:nlonr]),lat,o1[1,1,-iy,1,,],neofs=3)
- OEOF <- CFU_EOF(lon,lat,o1[1,1,-iy,1,,],neofs=1)
- ## Correct polarity of pattern. Fabian
- sign <- 1; if ( 0 < mean(OEOF$EOFs[1,,round(2*nlat/3)],na.rm=T) ) {sign <- -1}
- OEOF$EOFs <- OEOF$EOFs*sign; OEOF$PCs <- OEOF$PCs*sign
- # There should be an option that allows compute the model EOFs instead of only projecting on the observed EOFS. Virginie
- # for (ifc in 1:nfc) {
- # FEOF <- CFU_EOF(lon,lat,f1[1,ifc,,1,,],neofs=1)
- # NAOF.ver[1,ifc,]=FEOF$PCs
- # }
- ## Project forecast anomalies. Fabian
- # Without the rotation. Virginie
- #PCF <- CFU_MODE(c(lon[c((nlonr+1):nlon)]-360,lon[1:nlonr]),lat,f1,OEOF,mode=1)
- PCF <- CFU_MODE(lon,lat,f1,OEOF,mode=1)
- # for (imo in 1:nmod) { for (il in 1:lt) {NAOF.ver[imo,,iy,il] <- PCF$PC.ver[imo,,iy,imo]}}
- NAOF.ver[1,,iy] <- PCF$PC.ver[1,,iy,1]
- ## Project observed anomalies. Fabian.
- # Without the rotation. Virginie.
- #PCO <- CFU_MODE(c(lon[c((nlonr+1):nlon)]-360,lon[1:nlonr]),lat,o1,OEOF,mode=1)
- PCO <- CFU_MODE(lon,lat,o1,OEOF,mode=1)
- ## Keep PCs of excluded forecast start year. Fabian.
- NAOO.ver[1,1,iy] <- PCO$PC.ver[1,1,iy,1]
- }
- # Here there could be a rotation of the pattern. Virginie
- # OEOF$EOFs[1,,] <- OEOF$EOFs[1,c((nlonr+1):nlon,1:nlonr),]
- # The problem with this rotation is that the user does not have the corresponding longitudes anymore.
- # I don't think if should be rotated at all then. Virginie
- return(list(NAOF.ver=NAOF.ver,NAOO.ver=NAOO.ver,OEOF=OEOF))
- }
|