PoissonReg.R 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. #!/usr/bin/env Rscript
  2. #
  3. # Poisson regression on 1 variable only
  4. # by Mathieu Boudreault and Louis-Philippe Caron
  5. # Input: 1 file with a series of predictors
  6. # 1 file with a series of predictands
  7. # Required format: - Both files should be matrices, which each item separated from the next by ","
  8. # - The first column of both files has to be the same and must represent the time of measurements (unit is irrelevant)
  9. # - Missing values should be left blank
  10. # - Each column should be identified with text
  11. # Output: 1 file with the beta values - betas.csv
  12. # 1 file with the p-values - pval.csv
  13. # Optional : 1 file with p-values robust to autoregression and heteroskedasticity (Sandwich test) - pvalsand.csv - Is returned with file name of the 5th argument
  14. # Format: All are matrices, num(predictands) x num(predictors)
  15. #
  16. # History: Created on 11/11/13
  17. #
  18. #########################################################################
  19. rm(list=ls(all=TRUE)) # Clear memory
  20. ## 1. Load the necessary packages
  21. library(zoo)
  22. library(sandwich)
  23. library(lmtest)
  24. args=commandArgs(TRUE)
  25. countfile=args[1]
  26. predictorfile=args[2]
  27. #length(args)
  28. if ( length(args)==5 ) {
  29. betafile=args[3]
  30. pvaluefile=args[4]
  31. pvalsandfile=args[5]
  32. } else if ( length(args)==4 ) {
  33. betafile=args[3]
  34. pvaluefile=args[4]
  35. } else if ( length(args)==2 ) {
  36. #countfile="counts.csv"
  37. #predictorfile="predictors.csv"
  38. betafile="betas.csv"
  39. pvaluefile="pval.csv"
  40. pvalsandfile="pvalsand.csv"
  41. } else
  42. stop("Invalid number of input files !")
  43. path = "."
  44. ## 2. Load data and convert into R format
  45. X <- data.frame(read.table(paste(path, predictorfile, sep = "/"), sep=",", header=TRUE))
  46. N <- data.frame(read.table(paste(path, countfile, sep = "/"), sep=",", header=TRUE))
  47. ALL.data <- merge(N, X)
  48. ## 3. Additional variables
  49. nameX <- names(X)
  50. nameN <- names(N)
  51. nameX <- nameX[-1] #Remove first element
  52. nameN <- nameN[-1] #Remove first element
  53. #
  54. cte <- length(nameX) * length(nameN) # total number of regressions
  55. TheRegressions <- as.list(numeric(cte))
  56. TheOutput <- as.list(numeric(cte))
  57. dim(TheRegressions) <- c(length(nameN), length(nameX))
  58. dim(TheOutput) <- c(length(nameN), length(nameX))
  59. rownames(TheRegressions) <- nameN
  60. colnames(TheRegressions) <- nameX
  61. rownames(TheOutput) <- nameN
  62. colnames(TheOutput) <- nameX
  63. ## 4. Compute the Poisson regressions
  64. for(n in 1:length(nameN))
  65. {
  66. for(x in 1:length(nameX))
  67. {
  68. formule <- as.formula(paste(c(nameN[n], nameX[x]), collapse=" ~ "))
  69. regression <- glm(formula = formule, data = ALL.data, family = poisson)
  70. TheRegressions[[n, x]] <- regression #store the entire object
  71. TheOutput[[n, x]] <- list(modele = regression$formula,
  72. resultats = summary(regression),
  73. sandwich.coef.test = coeftest(regression, vcov = sandwich)
  74. )
  75. }
  76. }
  77. #
  78. ## 5. Variable to store results
  79. somm.beta <- matrix(0, nrow = length(nameN), ncol = length(nameX))
  80. rownames(somm.beta) <- nameN
  81. colnames(somm.beta) <- nameX
  82. #
  83. somm.pval <- matrix(0, nrow = length(nameN), ncol = length(nameX))
  84. rownames(somm.pval) <- nameN
  85. colnames(somm.pval) <- nameX
  86. somm.pval.sand <- matrix(0, nrow = length(nameN), ncol = length(nameX))
  87. rownames(somm.pval.sand) <- nameN
  88. colnames(somm.pval.sand) <- nameX
  89. #
  90. #
  91. for(n in 1:length(nameN))
  92. {
  93. for(x in 1:length(nameX))
  94. {
  95. somm.beta[n,x] <- TheOutput[[n, x]]$resultats$coefficients[2,1]
  96. somm.pval[n,x] <- TheOutput[[n, x]]$resultats$coefficients[2,4]
  97. somm.pval.sand[n,x] <- TheOutput[[n, x]]$sandwich.coef.test[2,4]
  98. }
  99. }
  100. #
  101. # Write the data
  102. write.csv(somm.beta, paste(path,betafile,sep = "/")) # The betas
  103. write.csv(somm.pval, paste(path, pvaluefile,sep = "/")) # The p-values
  104. if ( length(args)==5 ) {
  105. write.csv(somm.pval.sand, paste(path, pvalsandfile,sep = "/")) # The Sandwich p-values
  106. }
  107. rm(list=ls())
  108. quit()