#!/usr/bin/env Rscript 
#
# Poisson regression on 1 variable only
# by Mathieu Boudreault and Louis-Philippe Caron
# Input: 1 file with a series of predictors
#        1 file with a series of predictands
# Required format:  - Both files should be matrices, which each item separated from the next by ","
#                   - The first column of both files has to be the same and must represent the time of measurements (unit is irrelevant)
#                   - Missing values should be left blank
#                   - Each column should be identified with text
# Output: 1 file with the beta values - betas.csv
#         1 file with the p-values - pval.csv
#         Optional : 1 file with p-values robust to autoregression and heteroskedasticity (Sandwich test) - pvalsand.csv - Is returned with file name of the 5th argument 
#         Format: All are matrices, num(predictands) x num(predictors)
#
# History: Created on 11/11/13
#
#########################################################################

rm(list=ls(all=TRUE))  # Clear memory
##  1. Load the necessary packages
library(zoo)
library(sandwich)
library(lmtest) 

args=commandArgs(TRUE)
countfile=args[1]
predictorfile=args[2]

#length(args)

if ( length(args)==5  ) {
betafile=args[3]
pvaluefile=args[4]
pvalsandfile=args[5] 
} else if ( length(args)==4  ) {
betafile=args[3]
pvaluefile=args[4]
} else if ( length(args)==2  ) {
#countfile="counts.csv"
#predictorfile="predictors.csv"
betafile="betas.csv"
pvaluefile="pval.csv"
pvalsandfile="pvalsand.csv" 
} else 
    stop("Invalid number of input files !") 


path = "."

##  2. Load data and convert into R format
X <- data.frame(read.table(paste(path, predictorfile, sep = "/"), sep=",", header=TRUE))
N <- data.frame(read.table(paste(path, countfile, sep = "/"), sep=",", header=TRUE))
ALL.data <- merge(N, X)

## 3. Additional variables
nameX <- names(X)
nameN <- names(N)
nameX <- nameX[-1] #Remove first element
nameN <- nameN[-1] #Remove first element
#
cte <- length(nameX) * length(nameN) # total number of regressions
TheRegressions <- as.list(numeric(cte))
TheOutput <- as.list(numeric(cte))
dim(TheRegressions) <- c(length(nameN), length(nameX))
dim(TheOutput) <- c(length(nameN), length(nameX))

rownames(TheRegressions) <- nameN
colnames(TheRegressions) <- nameX
rownames(TheOutput) <- nameN
colnames(TheOutput) <- nameX

## 4. Compute the Poisson regressions
for(n in 1:length(nameN))
{
  for(x in 1:length(nameX))
  {
    formule <- as.formula(paste(c(nameN[n], nameX[x]), collapse=" ~ "))
    regression <- glm(formula = formule, data = ALL.data, family = poisson)
  
    TheRegressions[[n, x]] <- regression   #store the entire object
    TheOutput[[n, x]] <- list(modele = regression$formula,
                              resultats = summary(regression),
                              sandwich.coef.test = coeftest(regression, vcov = sandwich)
                              )
  }
}
#
## 5. Variable to store results 
 somm.beta <- matrix(0, nrow = length(nameN), ncol = length(nameX))
 rownames(somm.beta) <- nameN
 colnames(somm.beta) <- nameX
# 
 somm.pval <- matrix(0, nrow = length(nameN), ncol = length(nameX))
 rownames(somm.pval) <- nameN
 colnames(somm.pval) <- nameX
 
 somm.pval.sand <- matrix(0, nrow = length(nameN), ncol = length(nameX))
 rownames(somm.pval.sand) <- nameN
 colnames(somm.pval.sand) <- nameX
# 
# 
 for(n in 1:length(nameN))
 {
   for(x in 1:length(nameX))
   {
     somm.beta[n,x] <- TheOutput[[n, x]]$resultats$coefficients[2,1]
     somm.pval[n,x] <- TheOutput[[n, x]]$resultats$coefficients[2,4]
     somm.pval.sand[n,x] <- TheOutput[[n, x]]$sandwich.coef.test[2,4]
   }
 }
# 
# Write the data 
write.csv(somm.beta, paste(path,betafile,sep = "/")) # The betas
write.csv(somm.pval, paste(path, pvaluefile,sep = "/")) # The p-values
if ( length(args)==5  ) {
write.csv(somm.pval.sand, paste(path, pvalsandfile,sep = "/")) # The Sandwich p-values 
}
rm(list=ls())
quit()