# Intro to glm: difference between T-cells counts in Hodgkins and non-Hodgkins # Edps 589 # Fall 2018 # C.J.Anderson # # Notes: R/glm uses iterative weighted least squares or Fisher scoring algorithm; that is, it uses # the EXPECTED Hessian matrix (i.e., information or matrix of 2nd partial derivatives). # # SAS/GENMOD uses (ridge stabilized) Newton-Raphson algorthim; that is, it uses the # OBSERVED Hessian matrix. # # The 2 algorithms are essentaly the same except for how the Hessian is computed. # # The estimated parameters should be the same, but the estimated standard errors (and covariance # matrix) of parameter estimates will differ a bit. # library(MASS) # needed for negatiave binomial glm #setwd("D:\\Dropbox\\edps 589\\4 glm") setwd("C:\\Users\\cja\\Documents\\Dropbox\\edps 589\\4 glm") h <- read.table("hodgkins_data.txt",header=TRUE) # normal (default link is identity)...I added it to show how to include it. # The scale parameter is sigma normal.0 <- glm(count ~ 1,data=h,family=gaussian(link="identity")) summary(normal.0) normal.1 <- glm(count ~ disease,data=h,family=gaussian(link="identity")) summary(normal.1) # Normal with log link normal.0 <- glm(count ~ 1,data=h,family=gaussian(link="log")) summary(normal.0) normal.1 <- glm(count ~ disease,data=h,family=gaussian(link="log")) summary(normal.1) # Gamma (default link is inverse) gamma.0 <- glm(count ~ 1,data=h,family=Gamma) summary(gamma.0) gamma.1 <- glm(count ~ 1 + disease,data=h,family=Gamma) summary(gamma.1) invGauss.0 <- glm(count ~ 1,data=h,family=inverse.gaussian) summary(invGauss.0) invGauss.1 <- glm(count ~ disease,data=h,family=inverse.gaussian) summary(invGauss.1) # poisson # Scale parameter = 1 poisson.0 <- glm(count ~ 1,data=h,family=poisson) summary(poisson.0) poisson.1 <- glm(count ~ disease,data=h,family=poisson) summary(poisson.1) # negbin # Scale parameter = k = dispersion is k. SAS estimates 1/k) negbin.0 <- glm.nb(count ~ 1,data=h) summary(negbin.0) negbin.1 <- glm.nb(count ~ disease,data=h) summary(negbin.1) # Negative binomial is much better relative to poisson? anova(poisson.1,negbin.1) # Why? Show overdispersion in data aggregate(count ~ disease, data=h, FUN="mean") aggregate(count ~ disease, data=h, FUN="var")