123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174 |
- #!/usr/bin/env Rscript
- #
- ## Poisson regression on 1 variable only
- # by Mathieu Boudreault and Louis-Philippe Caron
- # Input: 1 file with a series of predictors
- # 1 file with a series of predictands
- # 1 file with a series of trends (max 6)
- # Required format: - All files should be matrices, which each item separated from the next by ","
- # - The first column of all files has to be the same and must represent the time of measurements (unit is irrelevant)
- # - Missing values should be left blank
- # - Each column should be identified with text
- # Output: 1 file with the beta values - betas.csv
- # 1 file with the p-values - pval.csv
- # Optional : 1 file with p-values robust to autoregression and heteroskedasticity (Sandwich test) - pvalsand.csv - Is produced if a file name is given (i.e. if 6 arguments are passed when calling the function)
- # Format: All are matrices, [num(predictands) x num(predictors)] x num(trends)
- #
- # History: Created on 11/11/13
- #
- #########################################################################
- rm(list=ls(all=TRUE)) # Clear memory
- ## 1. Load the necessary packages
- library(zoo)
- library(sandwich)
- library(lmtest)
- args=commandArgs(TRUE)
- countfile=args[1]
- predictorfile=args[2]
- trendfile=args[3]
- #length(args)
- if ( length(args)==6 ) {
- betafile=args[4]
- pvaluefile=args[5]
- pvalsandfile=args[6]
- } else if ( length(args)==5 ) {
- betafile=args[4]
- pvaluefile=args[5]
- } else if ( length(args)==3 ) {
- #countfile="counts.csv"
- #predictorfile="predictors.csv"
- betafile="betas.csv"
- pvaluefile="pval.csv"
- pvalsandfile="pvalsand.csv"
- } else
- stop("Invalid number of input files !")
- path = "."
- ## 2. Load data and convert into R format
- Predictors <- data.frame(read.table(paste(path, predictorfile, sep = "/" ), sep=",", header=TRUE))
- Trends <- data.frame(read.table(paste(path,trendfile, sep = "/"), sep=",", header=TRUE))
- X <- merge(Predictors, Trends)
- N <- data.frame(read.table(paste(path,countfile, sep = "/" ), sep=",", header=TRUE))
- ALL.data <- merge(N, X)
- ## 3. Additional variables
- nameP <- names(Predictors)
- nameP <- nameP[-1] #Remove first element
- nameT <- names(Trends)
- nameT <- nameT[-1] #Remove first element
- nameN <- names(N)
- nameN <- nameN[-1] #Remove first element
- #
- cte <- length(nameP) * length(nameN) * length(nameT) # total number of regressions
- TheRegressions <- as.list(numeric(cte))
- TheOutput <- as.list(numeric(cte))
- dim(TheRegressions) <- c(length(nameN), length(nameP), length(nameT))
- dim(TheOutput) <- c(length(nameN), length(nameP), length(nameT))
- rownames(TheRegressions) <- nameN
- colnames(TheRegressions) <- nameP
- rownames(TheOutput) <- nameN
- colnames(TheOutput) <- nameP
- ## 4. Compute the regressions
- for(ttt in 1:length(nameT))
- {
- for(n in 1:length(nameN))
- {
- for(x in 1:length(nameP))
- {
- REGRESSORS <- paste(c(nameT[ttt], nameP[x]), collapse="+")
- formule <- as.formula(paste(c(nameN[n], REGRESSORS), collapse=" ~ "))
- regression <- glm(formula = formule, data = ALL.data, family = poisson)
-
- TheRegressions[[n, x, ttt]] <- regression
- TheOutput[[n, x, ttt]] <- list(modele = regression$formula,
- resultats = summary(regression),
- sandwich.coef.test = coeftest(regression, vcov = sandwich)
- )
- }
- }
- }
- #
- ## 5. Variable to store results
- somm.beta <- array(0, dim=c(length(nameN), length(nameP), length(nameT)))
- rownames(somm.beta) <- nameN
- colnames(somm.beta) <- nameP
-
- somm.pval <- array(0, dim=c(length(nameN), length(nameP), length(nameT)))
- rownames(somm.pval) <- nameN
- colnames(somm.pval) <- nameP
-
- somm.pval.sand <- array(0, dim=c(length(nameN), length(nameP), length(nameT)))
- rownames(somm.pval.sand) <- nameN
- colnames(somm.pval.sand) <- nameP
-
- for(ttt in 1:length(nameT))
- {
- for(n in 1:length(nameN))
- {
- for(x in 1:length(nameP))
- {
- somm.beta[n,x,ttt] <- TheOutput[[n, x, ttt]]$resultats$coefficients[2,1]
- somm.pval[n,x,ttt] <- TheOutput[[n, x, ttt]]$resultats$coefficients[2,4]
- somm.pval.sand[n,x,ttt] <- TheOutput[[n, x, ttt]]$sandwich.coef.test[2,4]
- }
- }
- }
- #
- #
- # Write the data
- if ( length(nameT) == 1) {
- write.csv(rbind(somm.beta[,,1]), paste(path, betafile,sep = "/"))
- write.csv(rbind(somm.pval[,,1]), paste(path, pvaluefile,sep = "/"))
- if ( length(args)==6 ) {
- write.csv(rbind(somm.pval.sand[,,1]), paste(path, pvalsandfile,sep = "/"))
- }
- } else if( length(nameT) == 2) {
- write.csv(rbind(somm.beta[,,1],somm.beta[,,2]), paste(path, betafile,sep = "/"))
- write.csv(rbind(somm.pval[,,1],somm.pval[,,2]), paste(path, pvaluefile,sep = "/"))
- if ( length(args)==6 ) {
- write.csv(rbind(somm.pval.sand[,,1],somm.pval.sand[,,2]), paste(path, pvalsandfile,sep = "/"))
- }
- } else if( length(nameT) == 3) {
- write.csv(rbind(somm.beta[,,1],somm.beta[,,2],somm.beta[,,3]), paste(path, betafile,sep = "/"))
- write.csv(rbind(somm.pval[,,1],somm.pval[,,2],somm.pval[,,3]), paste(path, pvaluefile,sep = "/"))
- if ( length(args)==6 ) {
- write.csv(rbind(somm.pval.sand[,,1],somm.pval.sand[,,2],somm.pval.sand[,,3]), paste(path, pvalsandfile,sep = "/"))
- }
- } else if( length(nameT) == 4) {
- write.csv(rbind(somm.beta[,,1],somm.beta[,,2],somm.beta[,,3],somm.beta[,,4]), paste(path, betafile,sep = "/"))
- write.csv(rbind(somm.pval[,,1],somm.pval[,,2],somm.pval[,,3],somm.pval[,,4]), paste(path, pvaluefile,sep = "/"))
- if ( length(args)==6 ) {
- write.csv(rbind(somm.pval.sand[,,1],somm.pval.sand[,,2],somm.pval.sand[,,3],somm.pval.sand[,,4]), paste(path, pvalsandfile,sep = "/"))
- }
- } else if ( length(nameT) == 5) {
- write.csv(rbind(somm.beta[,,1],somm.beta[,,2],somm.beta[,,3],somm.beta[,,4],somm.beta[,,5]), paste(path, betafile,sep = "/"))
- write.csv(rbind(somm.pval[,,1],somm.pval[,,2],somm.pval[,,3],somm.pval[,,4],somm.pval[,,5]), paste(path, pvaluefile,sep = "/"))
- if ( length(args)==6 ) {
- write.csv(rbind(somm.pval.sand[,,1],somm.pval.sand[,,2],somm.pval.sand[,,3],somm.pval.sand[,,4],somm.pval.sand[,,5]), paste(path, pvalsandfile,sep = "/"))
- }
- } else if( length(nameT) == 6) {
- write.csv(rbind(somm.beta[,,1],somm.beta[,,2],somm.beta[,,3],somm.beta[,,4],somm.beta[,,5],somm.beta[,,6]), paste(path, betafile,sep = "/"))
- write.csv(rbind(somm.pval[,,1],somm.pval[,,2],somm.pval[,,3],somm.pval[,,4],somm.pval[,,5],somm.pval[,,6]), paste(path, pvaluefile,sep = "/"))
- if ( length(args)==6 ) {
- write.csv(rbind(somm.pval.sand[,,1],somm.pval.sand[,,2],somm.pval.sand[,,3],somm.pval.sand[,,4],somm.pval.sand[,,5],somm.pval.sand[,,6]), paste(path, pvalsandfile,sep = "/"))
- }
- } else
- stop("Too many trends!")
- rm(list=ls())
- quit()
|