123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120 |
- #!/usr/bin/env Rscript
- #
- # Poisson regression on 1 variable only
- # by Mathieu Boudreault and Louis-Philippe Caron
- # Input: 1 file with a series of predictors
- # 1 file with a series of predictands
- # Required format: - Both files should be matrices, which each item separated from the next by ","
- # - The first column of both files has to be the same and must represent the time of measurements (unit is irrelevant)
- # - Missing values should be left blank
- # - Each column should be identified with text
- # Output: 1 file with the beta values - betas.csv
- # 1 file with the p-values - pval.csv
- # Optional : 1 file with p-values robust to autoregression and heteroskedasticity (Sandwich test) - pvalsand.csv - Is returned with file name of the 5th argument
- # Format: All are matrices, num(predictands) x num(predictors)
- #
- # History: Created on 11/11/13
- #
- #########################################################################
- rm(list=ls(all=TRUE)) # Clear memory
- ## 1. Load the necessary packages
- library(zoo)
- library(sandwich)
- library(lmtest)
- args=commandArgs(TRUE)
- countfile=args[1]
- predictorfile=args[2]
- #length(args)
- if ( length(args)==5 ) {
- betafile=args[3]
- pvaluefile=args[4]
- pvalsandfile=args[5]
- } else if ( length(args)==4 ) {
- betafile=args[3]
- pvaluefile=args[4]
- } else if ( length(args)==2 ) {
- #countfile="counts.csv"
- #predictorfile="predictors.csv"
- betafile="betas.csv"
- pvaluefile="pval.csv"
- pvalsandfile="pvalsand.csv"
- } else
- stop("Invalid number of input files !")
- path = "."
- ## 2. Load data and convert into R format
- X <- data.frame(read.table(paste(path, predictorfile, sep = "/"), sep=",", header=TRUE))
- N <- data.frame(read.table(paste(path, countfile, sep = "/"), sep=",", header=TRUE))
- ALL.data <- merge(N, X)
- ## 3. Additional variables
- nameX <- names(X)
- nameN <- names(N)
- nameX <- nameX[-1] #Remove first element
- nameN <- nameN[-1] #Remove first element
- #
- cte <- length(nameX) * length(nameN) # total number of regressions
- TheRegressions <- as.list(numeric(cte))
- TheOutput <- as.list(numeric(cte))
- dim(TheRegressions) <- c(length(nameN), length(nameX))
- dim(TheOutput) <- c(length(nameN), length(nameX))
- rownames(TheRegressions) <- nameN
- colnames(TheRegressions) <- nameX
- rownames(TheOutput) <- nameN
- colnames(TheOutput) <- nameX
- ## 4. Compute the Poisson regressions
- for(n in 1:length(nameN))
- {
- for(x in 1:length(nameX))
- {
- formule <- as.formula(paste(c(nameN[n], nameX[x]), collapse=" ~ "))
- regression <- glm(formula = formule, data = ALL.data, family = poisson)
-
- TheRegressions[[n, x]] <- regression #store the entire object
- TheOutput[[n, x]] <- list(modele = regression$formula,
- resultats = summary(regression),
- sandwich.coef.test = coeftest(regression, vcov = sandwich)
- )
- }
- }
- #
- ## 5. Variable to store results
- somm.beta <- matrix(0, nrow = length(nameN), ncol = length(nameX))
- rownames(somm.beta) <- nameN
- colnames(somm.beta) <- nameX
- #
- somm.pval <- matrix(0, nrow = length(nameN), ncol = length(nameX))
- rownames(somm.pval) <- nameN
- colnames(somm.pval) <- nameX
-
- somm.pval.sand <- matrix(0, nrow = length(nameN), ncol = length(nameX))
- rownames(somm.pval.sand) <- nameN
- colnames(somm.pval.sand) <- nameX
- #
- #
- for(n in 1:length(nameN))
- {
- for(x in 1:length(nameX))
- {
- somm.beta[n,x] <- TheOutput[[n, x]]$resultats$coefficients[2,1]
- somm.pval[n,x] <- TheOutput[[n, x]]$resultats$coefficients[2,4]
- somm.pval.sand[n,x] <- TheOutput[[n, x]]$sandwich.coef.test[2,4]
- }
- }
- #
- # Write the data
- write.csv(somm.beta, paste(path,betafile,sep = "/")) # The betas
- write.csv(somm.pval, paste(path, pvaluefile,sep = "/")) # The p-values
- if ( length(args)==5 ) {
- write.csv(somm.pval.sand, paste(path, pvalsandfile,sep = "/")) # The Sandwich p-values
- }
- rm(list=ls())
- quit()
|