#!/usr/bin/env Rscript # # Poisson regression on 1 variable only # by Mathieu Boudreault and Louis-Philippe Caron # Input: 1 file with a series of predictors # 1 file with a series of predictands # Required format: - Both files should be matrices, which each item separated from the next by "," # - The first column of both files has to be the same and must represent the time of measurements (unit is irrelevant) # - Missing values should be left blank # - Each column should be identified with text # Output: 1 file with the beta values - betas.csv # 1 file with the p-values - pval.csv # Optional : 1 file with p-values robust to autoregression and heteroskedasticity (Sandwich test) - pvalsand.csv - Is returned with file name of the 5th argument # Format: All are matrices, num(predictands) x num(predictors) # # History: Created on 11/11/13 # ######################################################################### rm(list=ls(all=TRUE)) # Clear memory ## 1. Load the necessary packages library(zoo) library(sandwich) library(lmtest) args=commandArgs(TRUE) countfile=args[1] predictorfile=args[2] #length(args) if ( length(args)==5 ) { betafile=args[3] pvaluefile=args[4] pvalsandfile=args[5] } else if ( length(args)==4 ) { betafile=args[3] pvaluefile=args[4] } else if ( length(args)==2 ) { #countfile="counts.csv" #predictorfile="predictors.csv" betafile="betas.csv" pvaluefile="pval.csv" pvalsandfile="pvalsand.csv" } else stop("Invalid number of input files !") path = "." ## 2. Load data and convert into R format X <- data.frame(read.table(paste(path, predictorfile, sep = "/"), sep=",", header=TRUE)) N <- data.frame(read.table(paste(path, countfile, sep = "/"), sep=",", header=TRUE)) ALL.data <- merge(N, X) ## 3. Additional variables nameX <- names(X) nameN <- names(N) nameX <- nameX[-1] #Remove first element nameN <- nameN[-1] #Remove first element # cte <- length(nameX) * length(nameN) # total number of regressions TheRegressions <- as.list(numeric(cte)) TheOutput <- as.list(numeric(cte)) dim(TheRegressions) <- c(length(nameN), length(nameX)) dim(TheOutput) <- c(length(nameN), length(nameX)) rownames(TheRegressions) <- nameN colnames(TheRegressions) <- nameX rownames(TheOutput) <- nameN colnames(TheOutput) <- nameX ## 4. Compute the Poisson regressions for(n in 1:length(nameN)) { for(x in 1:length(nameX)) { formule <- as.formula(paste(c(nameN[n], nameX[x]), collapse=" ~ ")) regression <- glm(formula = formule, data = ALL.data, family = poisson) TheRegressions[[n, x]] <- regression #store the entire object TheOutput[[n, x]] <- list(modele = regression$formula, resultats = summary(regression), sandwich.coef.test = coeftest(regression, vcov = sandwich) ) } } # ## 5. Variable to store results somm.beta <- matrix(0, nrow = length(nameN), ncol = length(nameX)) rownames(somm.beta) <- nameN colnames(somm.beta) <- nameX # somm.pval <- matrix(0, nrow = length(nameN), ncol = length(nameX)) rownames(somm.pval) <- nameN colnames(somm.pval) <- nameX somm.pval.sand <- matrix(0, nrow = length(nameN), ncol = length(nameX)) rownames(somm.pval.sand) <- nameN colnames(somm.pval.sand) <- nameX # # for(n in 1:length(nameN)) { for(x in 1:length(nameX)) { somm.beta[n,x] <- TheOutput[[n, x]]$resultats$coefficients[2,1] somm.pval[n,x] <- TheOutput[[n, x]]$resultats$coefficients[2,4] somm.pval.sand[n,x] <- TheOutput[[n, x]]$sandwich.coef.test[2,4] } } # # Write the data write.csv(somm.beta, paste(path,betafile,sep = "/")) # The betas write.csv(somm.pval, paste(path, pvaluefile,sep = "/")) # The p-values if ( length(args)==5 ) { write.csv(somm.pval.sand, paste(path, pvalsandfile,sep = "/")) # The Sandwich p-values } rm(list=ls()) quit()