fmasson
/
forcing-perturbation


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174
							#!/usr/bin/env Rscript 
#
## Poisson regression on 1 variable only
# by Mathieu Boudreault and Louis-Philippe Caron
# Input: 1 file with a series of predictors
#        1 file with a series of predictands
#        1 file with a series of trends (max 6)
# Required format:  - All files should be matrices, which each item separated from the next by ","
#                   - The first column of all files has to be the same and must represent the time of measurements (unit is irrelevant)
#                   - Missing values should be left blank
#                   - Each column should be identified with text
# Output: 1 file with the beta values - betas.csv
#         1 file with the p-values - pval.csv
#         Optional : 1 file with p-values robust to autoregression and heteroskedasticity (Sandwich test) - pvalsand.csv  - Is produced if a file name is given (i.e. if 6 arguments are passed when calling the function)  
#         Format: All are matrices, [num(predictands) x num(predictors)] x num(trends)
#
# History: Created on 11/11/13
#
#########################################################################

rm(list=ls(all=TRUE))  # Clear memory
##  1. Load the necessary packages
library(zoo)
library(sandwich)
library(lmtest) 

args=commandArgs(TRUE)
countfile=args[1]
predictorfile=args[2]
trendfile=args[3]
#length(args)

if ( length(args)==6  ) {
betafile=args[4]
pvaluefile=args[5]
pvalsandfile=args[6] 
} else if ( length(args)==5  ) {
betafile=args[4]
pvaluefile=args[5]
} else if ( length(args)==3  ) {
#countfile="counts.csv"
#predictorfile="predictors.csv"
betafile="betas.csv"
pvaluefile="pval.csv"
pvalsandfile="pvalsand.csv" 
} else 
    stop("Invalid number of input files !") 


path = "."

##  2. Load data and convert into R format
Predictors <- data.frame(read.table(paste(path, predictorfile, sep = "/" ), sep=",", header=TRUE))
Trends <- data.frame(read.table(paste(path,trendfile, sep = "/"), sep=",", header=TRUE))

X <- merge(Predictors, Trends)

N <- data.frame(read.table(paste(path,countfile, sep = "/" ), sep=",", header=TRUE))

ALL.data <- merge(N, X)


## 3. Additional variables
nameP <- names(Predictors)
nameP <- nameP[-1] #Remove first element
nameT <- names(Trends)
nameT <- nameT[-1] #Remove first element
nameN <- names(N)
nameN <- nameN[-1] #Remove first element
#
cte <- length(nameP) * length(nameN) * length(nameT)  # total number of regressions

TheRegressions <- as.list(numeric(cte))
TheOutput <- as.list(numeric(cte))

dim(TheRegressions) <- c(length(nameN), length(nameP), length(nameT))
dim(TheOutput) <- c(length(nameN), length(nameP), length(nameT))

rownames(TheRegressions) <- nameN
colnames(TheRegressions) <- nameP 
rownames(TheOutput) <- nameN
colnames(TheOutput) <- nameP

## 4. Compute the regressions

for(ttt in 1:length(nameT))
{
    for(n in 1:length(nameN))
    {
        for(x in 1:length(nameP))
        {
          REGRESSORS <- paste(c(nameT[ttt], nameP[x]), collapse="+")
          formule <- as.formula(paste(c(nameN[n], REGRESSORS), collapse=" ~ "))
          regression <- glm(formula = formule, data = ALL.data, family = poisson)
          
          TheRegressions[[n, x, ttt]] <- regression
          TheOutput[[n, x, ttt]] <- list(modele = regression$formula,
                                    resultats = summary(regression),
                                    sandwich.coef.test = coeftest(regression, vcov = sandwich)
                                    )
      }
  }
}
#
## 5. Variable to store results 
 somm.beta <- array(0, dim=c(length(nameN), length(nameP), length(nameT)))
 rownames(somm.beta) <- nameN
 colnames(somm.beta) <- nameP
 
 somm.pval <- array(0, dim=c(length(nameN), length(nameP), length(nameT)))
 rownames(somm.pval) <- nameN
 colnames(somm.pval) <- nameP
 
 somm.pval.sand <- array(0, dim=c(length(nameN), length(nameP), length(nameT)))
 rownames(somm.pval.sand) <- nameN
 colnames(somm.pval.sand) <- nameP
 
 for(ttt in 1:length(nameT))
 {
   for(n in 1:length(nameN))
   {
     for(x in 1:length(nameP))
     {
       somm.beta[n,x,ttt] <- TheOutput[[n, x, ttt]]$resultats$coefficients[2,1]
       somm.pval[n,x,ttt] <- TheOutput[[n, x, ttt]]$resultats$coefficients[2,4]
       somm.pval.sand[n,x,ttt] <- TheOutput[[n, x, ttt]]$sandwich.coef.test[2,4]
     }
   }
 }
#
# 
# Write the data 
if ( length(nameT) == 1) {
  write.csv(rbind(somm.beta[,,1]), paste(path, betafile,sep = "/"))
  write.csv(rbind(somm.pval[,,1]), paste(path, pvaluefile,sep = "/"))
  if ( length(args)==6  ) {
    write.csv(rbind(somm.pval.sand[,,1]), paste(path, pvalsandfile,sep = "/"))
  }
} else if( length(nameT) == 2) {
  write.csv(rbind(somm.beta[,,1],somm.beta[,,2]), paste(path, betafile,sep = "/"))
  write.csv(rbind(somm.pval[,,1],somm.pval[,,2]), paste(path, pvaluefile,sep = "/"))
  if ( length(args)==6  ) {
    write.csv(rbind(somm.pval.sand[,,1],somm.pval.sand[,,2]), paste(path, pvalsandfile,sep = "/"))
  }
} else if( length(nameT) == 3) {
  write.csv(rbind(somm.beta[,,1],somm.beta[,,2],somm.beta[,,3]), paste(path, betafile,sep = "/"))
  write.csv(rbind(somm.pval[,,1],somm.pval[,,2],somm.pval[,,3]), paste(path, pvaluefile,sep = "/"))
  if ( length(args)==6  ) {
    write.csv(rbind(somm.pval.sand[,,1],somm.pval.sand[,,2],somm.pval.sand[,,3]), paste(path, pvalsandfile,sep = "/"))
  }
} else if( length(nameT) == 4) {
  write.csv(rbind(somm.beta[,,1],somm.beta[,,2],somm.beta[,,3],somm.beta[,,4]), paste(path, betafile,sep = "/"))
  write.csv(rbind(somm.pval[,,1],somm.pval[,,2],somm.pval[,,3],somm.pval[,,4]), paste(path, pvaluefile,sep = "/"))
  if ( length(args)==6  ) {
    write.csv(rbind(somm.pval.sand[,,1],somm.pval.sand[,,2],somm.pval.sand[,,3],somm.pval.sand[,,4]), paste(path, pvalsandfile,sep = "/"))
  }
} else if ( length(nameT) == 5) {
  write.csv(rbind(somm.beta[,,1],somm.beta[,,2],somm.beta[,,3],somm.beta[,,4],somm.beta[,,5]), paste(path, betafile,sep = "/"))
  write.csv(rbind(somm.pval[,,1],somm.pval[,,2],somm.pval[,,3],somm.pval[,,4],somm.pval[,,5]), paste(path, pvaluefile,sep = "/"))
  if ( length(args)==6  ) {
    write.csv(rbind(somm.pval.sand[,,1],somm.pval.sand[,,2],somm.pval.sand[,,3],somm.pval.sand[,,4],somm.pval.sand[,,5]), paste(path, pvalsandfile,sep = "/"))
  }
} else if( length(nameT) == 6) {
  write.csv(rbind(somm.beta[,,1],somm.beta[,,2],somm.beta[,,3],somm.beta[,,4],somm.beta[,,5],somm.beta[,,6]), paste(path, betafile,sep = "/"))
  write.csv(rbind(somm.pval[,,1],somm.pval[,,2],somm.pval[,,3],somm.pval[,,4],somm.pval[,,5],somm.pval[,,6]), paste(path, pvaluefile,sep = "/"))
  if ( length(args)==6  ) {
    write.csv(rbind(somm.pval.sand[,,1],somm.pval.sand[,,2],somm.pval.sand[,,3],somm.pval.sand[,,4],somm.pval.sand[,,5],somm.pval.sand[,,6]), paste(path, pvalsandfile,sep = "/"))
  }
} else 
   stop("Too many trends!")

rm(list=ls())
quit()