PoissonRegwTrend.R 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174
  1. #!/usr/bin/env Rscript
  2. #
  3. ## Poisson regression on 1 variable only
  4. # by Mathieu Boudreault and Louis-Philippe Caron
  5. # Input: 1 file with a series of predictors
  6. # 1 file with a series of predictands
  7. # 1 file with a series of trends (max 6)
  8. # Required format: - All files should be matrices, which each item separated from the next by ","
  9. # - The first column of all files has to be the same and must represent the time of measurements (unit is irrelevant)
  10. # - Missing values should be left blank
  11. # - Each column should be identified with text
  12. # Output: 1 file with the beta values - betas.csv
  13. # 1 file with the p-values - pval.csv
  14. # Optional : 1 file with p-values robust to autoregression and heteroskedasticity (Sandwich test) - pvalsand.csv - Is produced if a file name is given (i.e. if 6 arguments are passed when calling the function)
  15. # Format: All are matrices, [num(predictands) x num(predictors)] x num(trends)
  16. #
  17. # History: Created on 11/11/13
  18. #
  19. #########################################################################
  20. rm(list=ls(all=TRUE)) # Clear memory
  21. ## 1. Load the necessary packages
  22. library(zoo)
  23. library(sandwich)
  24. library(lmtest)
  25. args=commandArgs(TRUE)
  26. countfile=args[1]
  27. predictorfile=args[2]
  28. trendfile=args[3]
  29. #length(args)
  30. if ( length(args)==6 ) {
  31. betafile=args[4]
  32. pvaluefile=args[5]
  33. pvalsandfile=args[6]
  34. } else if ( length(args)==5 ) {
  35. betafile=args[4]
  36. pvaluefile=args[5]
  37. } else if ( length(args)==3 ) {
  38. #countfile="counts.csv"
  39. #predictorfile="predictors.csv"
  40. betafile="betas.csv"
  41. pvaluefile="pval.csv"
  42. pvalsandfile="pvalsand.csv"
  43. } else
  44. stop("Invalid number of input files !")
  45. path = "."
  46. ## 2. Load data and convert into R format
  47. Predictors <- data.frame(read.table(paste(path, predictorfile, sep = "/" ), sep=",", header=TRUE))
  48. Trends <- data.frame(read.table(paste(path,trendfile, sep = "/"), sep=",", header=TRUE))
  49. X <- merge(Predictors, Trends)
  50. N <- data.frame(read.table(paste(path,countfile, sep = "/" ), sep=",", header=TRUE))
  51. ALL.data <- merge(N, X)
  52. ## 3. Additional variables
  53. nameP <- names(Predictors)
  54. nameP <- nameP[-1] #Remove first element
  55. nameT <- names(Trends)
  56. nameT <- nameT[-1] #Remove first element
  57. nameN <- names(N)
  58. nameN <- nameN[-1] #Remove first element
  59. #
  60. cte <- length(nameP) * length(nameN) * length(nameT) # total number of regressions
  61. TheRegressions <- as.list(numeric(cte))
  62. TheOutput <- as.list(numeric(cte))
  63. dim(TheRegressions) <- c(length(nameN), length(nameP), length(nameT))
  64. dim(TheOutput) <- c(length(nameN), length(nameP), length(nameT))
  65. rownames(TheRegressions) <- nameN
  66. colnames(TheRegressions) <- nameP
  67. rownames(TheOutput) <- nameN
  68. colnames(TheOutput) <- nameP
  69. ## 4. Compute the regressions
  70. for(ttt in 1:length(nameT))
  71. {
  72. for(n in 1:length(nameN))
  73. {
  74. for(x in 1:length(nameP))
  75. {
  76. REGRESSORS <- paste(c(nameT[ttt], nameP[x]), collapse="+")
  77. formule <- as.formula(paste(c(nameN[n], REGRESSORS), collapse=" ~ "))
  78. regression <- glm(formula = formule, data = ALL.data, family = poisson)
  79. TheRegressions[[n, x, ttt]] <- regression
  80. TheOutput[[n, x, ttt]] <- list(modele = regression$formula,
  81. resultats = summary(regression),
  82. sandwich.coef.test = coeftest(regression, vcov = sandwich)
  83. )
  84. }
  85. }
  86. }
  87. #
  88. ## 5. Variable to store results
  89. somm.beta <- array(0, dim=c(length(nameN), length(nameP), length(nameT)))
  90. rownames(somm.beta) <- nameN
  91. colnames(somm.beta) <- nameP
  92. somm.pval <- array(0, dim=c(length(nameN), length(nameP), length(nameT)))
  93. rownames(somm.pval) <- nameN
  94. colnames(somm.pval) <- nameP
  95. somm.pval.sand <- array(0, dim=c(length(nameN), length(nameP), length(nameT)))
  96. rownames(somm.pval.sand) <- nameN
  97. colnames(somm.pval.sand) <- nameP
  98. for(ttt in 1:length(nameT))
  99. {
  100. for(n in 1:length(nameN))
  101. {
  102. for(x in 1:length(nameP))
  103. {
  104. somm.beta[n,x,ttt] <- TheOutput[[n, x, ttt]]$resultats$coefficients[2,1]
  105. somm.pval[n,x,ttt] <- TheOutput[[n, x, ttt]]$resultats$coefficients[2,4]
  106. somm.pval.sand[n,x,ttt] <- TheOutput[[n, x, ttt]]$sandwich.coef.test[2,4]
  107. }
  108. }
  109. }
  110. #
  111. #
  112. # Write the data
  113. if ( length(nameT) == 1) {
  114. write.csv(rbind(somm.beta[,,1]), paste(path, betafile,sep = "/"))
  115. write.csv(rbind(somm.pval[,,1]), paste(path, pvaluefile,sep = "/"))
  116. if ( length(args)==6 ) {
  117. write.csv(rbind(somm.pval.sand[,,1]), paste(path, pvalsandfile,sep = "/"))
  118. }
  119. } else if( length(nameT) == 2) {
  120. write.csv(rbind(somm.beta[,,1],somm.beta[,,2]), paste(path, betafile,sep = "/"))
  121. write.csv(rbind(somm.pval[,,1],somm.pval[,,2]), paste(path, pvaluefile,sep = "/"))
  122. if ( length(args)==6 ) {
  123. write.csv(rbind(somm.pval.sand[,,1],somm.pval.sand[,,2]), paste(path, pvalsandfile,sep = "/"))
  124. }
  125. } else if( length(nameT) == 3) {
  126. write.csv(rbind(somm.beta[,,1],somm.beta[,,2],somm.beta[,,3]), paste(path, betafile,sep = "/"))
  127. write.csv(rbind(somm.pval[,,1],somm.pval[,,2],somm.pval[,,3]), paste(path, pvaluefile,sep = "/"))
  128. if ( length(args)==6 ) {
  129. write.csv(rbind(somm.pval.sand[,,1],somm.pval.sand[,,2],somm.pval.sand[,,3]), paste(path, pvalsandfile,sep = "/"))
  130. }
  131. } else if( length(nameT) == 4) {
  132. write.csv(rbind(somm.beta[,,1],somm.beta[,,2],somm.beta[,,3],somm.beta[,,4]), paste(path, betafile,sep = "/"))
  133. write.csv(rbind(somm.pval[,,1],somm.pval[,,2],somm.pval[,,3],somm.pval[,,4]), paste(path, pvaluefile,sep = "/"))
  134. if ( length(args)==6 ) {
  135. write.csv(rbind(somm.pval.sand[,,1],somm.pval.sand[,,2],somm.pval.sand[,,3],somm.pval.sand[,,4]), paste(path, pvalsandfile,sep = "/"))
  136. }
  137. } else if ( length(nameT) == 5) {
  138. write.csv(rbind(somm.beta[,,1],somm.beta[,,2],somm.beta[,,3],somm.beta[,,4],somm.beta[,,5]), paste(path, betafile,sep = "/"))
  139. write.csv(rbind(somm.pval[,,1],somm.pval[,,2],somm.pval[,,3],somm.pval[,,4],somm.pval[,,5]), paste(path, pvaluefile,sep = "/"))
  140. if ( length(args)==6 ) {
  141. write.csv(rbind(somm.pval.sand[,,1],somm.pval.sand[,,2],somm.pval.sand[,,3],somm.pval.sand[,,4],somm.pval.sand[,,5]), paste(path, pvalsandfile,sep = "/"))
  142. }
  143. } else if( length(nameT) == 6) {
  144. write.csv(rbind(somm.beta[,,1],somm.beta[,,2],somm.beta[,,3],somm.beta[,,4],somm.beta[,,5],somm.beta[,,6]), paste(path, betafile,sep = "/"))
  145. write.csv(rbind(somm.pval[,,1],somm.pval[,,2],somm.pval[,,3],somm.pval[,,4],somm.pval[,,5],somm.pval[,,6]), paste(path, pvaluefile,sep = "/"))
  146. if ( length(args)==6 ) {
  147. write.csv(rbind(somm.pval.sand[,,1],somm.pval.sand[,,2],somm.pval.sand[,,3],somm.pval.sand[,,4],somm.pval.sand[,,5],somm.pval.sand[,,6]), paste(path, pvalsandfile,sep = "/"))
  148. }
  149. } else
  150. stop("Too many trends!")
  151. rm(list=ls())
  152. quit()