# Intro to glm count data: Death due to aids # Edps 589 # Fall 2018 # C.J.Anderson # # Fit loess and a few poissons # Most of code deals with producing nice graphs # setwd("D:\\Dropbox\\edps 589\\4 glm") aids <- read.table("death_due_to_AID_data.txt",header=TRUE) aids$log.month <- log(aids$month) # plot of data and loess curve plot(aids$month,aids$count, type='p', pch=20, cex=1.75, main="Data: Death Due to Aids", xlab="3 Month Period", ylab="Number of Deaths" ) smooth <- loess(aids$count ~aids$month) lines(aids$month,smooth$fitted,col="purple") legend("topleft",legend=c("Data","Loess Curve"), col=c("black","purple"), lwd=2, lty=c(0,1),pch=c(20,NULL)) # Poisson Regression model, identity link --- total fails poi0 <- glm(count ~ month, data=aids, family=poisson(link="identity")) summary(poi0) # Poisson Regresson poi1 <- glm(count ~ month,data=aids,family=poisson(link="log")) summary(poi1) # --- add to plot # plot of data and loess curve plot(aids$month,aids$count, type='p', pch=20, cex=1.75, main="Data: Death Due to Aids", xlab="3 Month Period", ylab="Number of Deaths" ) smooth <- loess(aids$count ~aids$month) lines(aids$month,poi1$fitted,col="cyan",lwd=2) lines(aids$month,smooth$fitted,col="purple") legend("topleft",legend=c("Data","Loess Curve","Poisson (count~month)"), col=c("black","purple","cyan"), lwd=2, lty=c(0,1,1),pch=c(20,NULL,NULL)) # Try log(month) aids$log.month <- log(aids$month) poi2 <- glm(count ~ log.month,data=aids,family=poisson(link="log")) summary(poi2) plot(aids$log.month,aids$count,pch=20,col="black", main="Data and Fitted by Log(Month)" ) lines(aids$log.month,poi1$fitted,col="cyan") lines(aids$log.month,poi2$fitted,col="green") smooth2 <- loess(aids$count~ aids$log.month) lines(aids$log.month,smooth2$fitted,col="purple") legend("topleft",legend=c("Data","Loess","Poisson (count~month)","Poisson (count~log.month)"), col=c("black","purple","cyan","green"), lty=c(0,1,1,1), pch=c(20,NULL,NULL,NULL)) # Add in 95% CI for 2nd poisson model poi2.fit <- poi2$fitted # From 1st principles because I had hard time doing it otherwise # -- Using what I know about distributions of linear combintations of random variables S <- vcov(poi2) # Covariance matrix of estimated parameters one.vec <- matrix(1,nrow=14,ncol=1) # need a vector of ones (for intercept) lnmonth <- as.matrix(aids$log.month) # need predictor as vector too L <- cbind(one.vec,lnmonth) # Linear combination matrix (row define them) se <- sqrt(diag( L %*% S %*% t(L))) # A bit of linear algebra--- se of linear predictor eta <- log(poi2.fit) # Dealing with the link upper <- exp(eta + 1.96*se) lower <- exp(eta - 1.96*se) plot(aids$log.month,aids$count,pch=20,col="black",type="p", main="Data and Fitted by Log(Month)" ) col2rgb("green") # get color codes for shading polygon(c(aids$log.month,rev(aids$log.month)) , c(upper,rev(lower)), col=rgb(0,1,0,.2), border=NA ) lines(aids$log.month,poi1$fitted,col="cyan") lines(aids$log.month,poi2$fitted,col="green") smooth2 <- loess(aids$count~ aids$log.month) lines(aids$log.month,smooth2$fitted,col="purple") legend("topleft",legend=c("Data","Loess","Poisson (count~month)","Poisson (count~log.month)","95% bands"), col=c("black","purple","cyan","green","green"), lty=c(0,1,1,1), pch=c(20,NULL,NULL,NULL))