#!/usr/bin/env Rscript # ## Poisson regression on 1 variable only # by Mathieu Boudreault and Louis-Philippe Caron # Input: 1 file with a series of predictors # 1 file with a series of predictands # 1 file with a series of trends (max 6) # Required format: - All files should be matrices, which each item separated from the next by "," # - The first column of all files has to be the same and must represent the time of measurements (unit is irrelevant) # - Missing values should be left blank # - Each column should be identified with text # Output: 1 file with the beta values - betas.csv # 1 file with the p-values - pval.csv # Optional : 1 file with p-values robust to autoregression and heteroskedasticity (Sandwich test) - pvalsand.csv - Is produced if a file name is given (i.e. if 6 arguments are passed when calling the function) # Format: All are matrices, [num(predictands) x num(predictors)] x num(trends) # # History: Created on 11/11/13 # ######################################################################### rm(list=ls(all=TRUE)) # Clear memory ## 1. Load the necessary packages library(zoo) library(sandwich) library(lmtest) args=commandArgs(TRUE) countfile=args[1] predictorfile=args[2] trendfile=args[3] #length(args) if ( length(args)==6 ) { betafile=args[4] pvaluefile=args[5] pvalsandfile=args[6] } else if ( length(args)==5 ) { betafile=args[4] pvaluefile=args[5] } else if ( length(args)==3 ) { #countfile="counts.csv" #predictorfile="predictors.csv" betafile="betas.csv" pvaluefile="pval.csv" pvalsandfile="pvalsand.csv" } else stop("Invalid number of input files !") path = "." ## 2. Load data and convert into R format Predictors <- data.frame(read.table(paste(path, predictorfile, sep = "/" ), sep=",", header=TRUE)) Trends <- data.frame(read.table(paste(path,trendfile, sep = "/"), sep=",", header=TRUE)) X <- merge(Predictors, Trends) N <- data.frame(read.table(paste(path,countfile, sep = "/" ), sep=",", header=TRUE)) ALL.data <- merge(N, X) ## 3. Additional variables nameP <- names(Predictors) nameP <- nameP[-1] #Remove first element nameT <- names(Trends) nameT <- nameT[-1] #Remove first element nameN <- names(N) nameN <- nameN[-1] #Remove first element # cte <- length(nameP) * length(nameN) * length(nameT) # total number of regressions TheRegressions <- as.list(numeric(cte)) TheOutput <- as.list(numeric(cte)) dim(TheRegressions) <- c(length(nameN), length(nameP), length(nameT)) dim(TheOutput) <- c(length(nameN), length(nameP), length(nameT)) rownames(TheRegressions) <- nameN colnames(TheRegressions) <- nameP rownames(TheOutput) <- nameN colnames(TheOutput) <- nameP ## 4. Compute the regressions for(ttt in 1:length(nameT)) { for(n in 1:length(nameN)) { for(x in 1:length(nameP)) { REGRESSORS <- paste(c(nameT[ttt], nameP[x]), collapse="+") formule <- as.formula(paste(c(nameN[n], REGRESSORS), collapse=" ~ ")) regression <- glm(formula = formule, data = ALL.data, family = poisson) TheRegressions[[n, x, ttt]] <- regression TheOutput[[n, x, ttt]] <- list(modele = regression$formula, resultats = summary(regression), sandwich.coef.test = coeftest(regression, vcov = sandwich) ) } } } # ## 5. Variable to store results somm.beta <- array(0, dim=c(length(nameN), length(nameP), length(nameT))) rownames(somm.beta) <- nameN colnames(somm.beta) <- nameP somm.pval <- array(0, dim=c(length(nameN), length(nameP), length(nameT))) rownames(somm.pval) <- nameN colnames(somm.pval) <- nameP somm.pval.sand <- array(0, dim=c(length(nameN), length(nameP), length(nameT))) rownames(somm.pval.sand) <- nameN colnames(somm.pval.sand) <- nameP for(ttt in 1:length(nameT)) { for(n in 1:length(nameN)) { for(x in 1:length(nameP)) { somm.beta[n,x,ttt] <- TheOutput[[n, x, ttt]]$resultats$coefficients[2,1] somm.pval[n,x,ttt] <- TheOutput[[n, x, ttt]]$resultats$coefficients[2,4] somm.pval.sand[n,x,ttt] <- TheOutput[[n, x, ttt]]$sandwich.coef.test[2,4] } } } # # # Write the data if ( length(nameT) == 1) { write.csv(rbind(somm.beta[,,1]), paste(path, betafile,sep = "/")) write.csv(rbind(somm.pval[,,1]), paste(path, pvaluefile,sep = "/")) if ( length(args)==6 ) { write.csv(rbind(somm.pval.sand[,,1]), paste(path, pvalsandfile,sep = "/")) } } else if( length(nameT) == 2) { write.csv(rbind(somm.beta[,,1],somm.beta[,,2]), paste(path, betafile,sep = "/")) write.csv(rbind(somm.pval[,,1],somm.pval[,,2]), paste(path, pvaluefile,sep = "/")) if ( length(args)==6 ) { write.csv(rbind(somm.pval.sand[,,1],somm.pval.sand[,,2]), paste(path, pvalsandfile,sep = "/")) } } else if( length(nameT) == 3) { write.csv(rbind(somm.beta[,,1],somm.beta[,,2],somm.beta[,,3]), paste(path, betafile,sep = "/")) write.csv(rbind(somm.pval[,,1],somm.pval[,,2],somm.pval[,,3]), paste(path, pvaluefile,sep = "/")) if ( length(args)==6 ) { write.csv(rbind(somm.pval.sand[,,1],somm.pval.sand[,,2],somm.pval.sand[,,3]), paste(path, pvalsandfile,sep = "/")) } } else if( length(nameT) == 4) { write.csv(rbind(somm.beta[,,1],somm.beta[,,2],somm.beta[,,3],somm.beta[,,4]), paste(path, betafile,sep = "/")) write.csv(rbind(somm.pval[,,1],somm.pval[,,2],somm.pval[,,3],somm.pval[,,4]), paste(path, pvaluefile,sep = "/")) if ( length(args)==6 ) { write.csv(rbind(somm.pval.sand[,,1],somm.pval.sand[,,2],somm.pval.sand[,,3],somm.pval.sand[,,4]), paste(path, pvalsandfile,sep = "/")) } } else if ( length(nameT) == 5) { write.csv(rbind(somm.beta[,,1],somm.beta[,,2],somm.beta[,,3],somm.beta[,,4],somm.beta[,,5]), paste(path, betafile,sep = "/")) write.csv(rbind(somm.pval[,,1],somm.pval[,,2],somm.pval[,,3],somm.pval[,,4],somm.pval[,,5]), paste(path, pvaluefile,sep = "/")) if ( length(args)==6 ) { write.csv(rbind(somm.pval.sand[,,1],somm.pval.sand[,,2],somm.pval.sand[,,3],somm.pval.sand[,,4],somm.pval.sand[,,5]), paste(path, pvalsandfile,sep = "/")) } } else if( length(nameT) == 6) { write.csv(rbind(somm.beta[,,1],somm.beta[,,2],somm.beta[,,3],somm.beta[,,4],somm.beta[,,5],somm.beta[,,6]), paste(path, betafile,sep = "/")) write.csv(rbind(somm.pval[,,1],somm.pval[,,2],somm.pval[,,3],somm.pval[,,4],somm.pval[,,5],somm.pval[,,6]), paste(path, pvaluefile,sep = "/")) if ( length(args)==6 ) { write.csv(rbind(somm.pval.sand[,,1],somm.pval.sand[,,2],somm.pval.sand[,,3],somm.pval.sand[,,4],somm.pval.sand[,,5],somm.pval.sand[,,6]), paste(path, pvalsandfile,sep = "/")) } } else stop("Too many trends!") rm(list=ls()) quit()