# Intro to glm poisson regression: Agresti's Crab data # This won't match the results in lecture because I did things a bit different # (i.e., fit model to uncollapsed data, graphs, models with and without outliers, # different graphs). # Edps 589 # Fall 2018 # C.J.Anderson # # Started with looking at data # Fit models to data with and without outlier # Plot results # (to get groups of data try the "cut( )" function # library(MASS) setwd("D:\\Dropbox\\edps 589\\4 glm") crabs <- read.table("crab_data.txt",header=TRUE) # Rescale weight varable crabs$weigh = crabs$weight/1000 # Because I don't want to keep adding "crabs" in front of variables... attach(crabs) par(mfrow=c(2,1)) # Take a look at data with smooth curve plot(weigh,satell, type='p',pch=20,cex=1.5, main="Wether Crab Has Any Satellites vs Weight", xlab="Weight/1000 of Female", ylab="Number of Satellites", xlim=c(0,5) ) smooth <- loess(satell~weigh) j <- order(crabs$weigh) lines(weigh[j],smooth$fitted[j],col="blue",lwd=3) text(5,7.75,"Outlier?",col="red") # Remove the outlier and see what happens to loess crabs.sub <- subset(crabs,weigh<4.5) # Take a look at data with smooth curve plot(crabs.sub$weigh,crabs.sub$satell, type='p',pch=20,cex=1.5, main="Wether Crab Has Any Satellites vs Weight: NO OUTLIER", xlab="Weight/1000 of Female", ylab="Number of Satellites", xlim=c(0,5) ) smooth <- loess(crabs.sub$satell~crabs.sub$weigh) j <- order(crabs.sub$weigh) lines(crabs.sub$weigh[j],smooth$fitted[j],col="blue",lwd=3) text(5,7.75,"Outlier?",col="red") # Model with outlier mod.poi1 <- glm(satell ~ weigh,data=crabs, family=poisson(link="log")) summary(mod.poi1) anova(mod.poi1) # "Deviance" is likelihood ratio test for weigh # Model with outlier mod.poi2 <- glm(satell ~ weigh,data=crabs.sub, family=poisson(link="log")) summary(mod.poi2) anova(mod.poi2) par(mfrow=c(1,1)) # Take a look at data with smooth curve, fitted with and without outlier plot(weigh,satell, type='p',pch=20,cex=1.5, main="Whether Crab Has Any Satellites vs Weight", xlab="Weight/1000 of Female", ylab="Number of Satellites", xlim=c(0,5) ) text(5,7.75,"Outlier?",col="red") smooth <- loess(satell~weigh) j <- order(crabs$weigh) lines(weigh[j],smooth$fitted[j],col="blue",lwd=3) fit1 <- fitted(mod.poi1) lines(weigh[j],fit1[j],lwd=3,col="green") fit2 <- fitted(mod.poi2) k <- order(crabs.sub$weigh) lines(crabs.sub$weigh[k],fit2[k],col="red",lwd=3) legend("topleft",legend=c("Loess","Poisson","95% confidence","Poisson/no outlier","95% confidence"),col=c("blue","green","green","red","red"),lty=c(1,1,1)) # To get 95% bands on line w/o outlier test <- predict(mod.poi2, newdata=NULL, type=c("response"), se.fit=TRUE) names(test) upper <- test$fit + 1.96*test$se.fit lower <- test$fit - 1.96*test$se.fit polygon(c(crabs.sub$weigh[k],rev(crabs.sub$weigh[k])) , c(upper[k],rev(lower[k])), col=rgb(1,0,0,.3), border=NA ) # to get 95% bands on model w/ outliers test <- predict(mod.poi1, newdata=NULL, type=c("response"), se.fit=TRUE) names(test) upper <- test$fit + 1.96*test$se.fit lower <- test$fit - 1.96*test$se.fit polygon(c(weigh[j],rev(weigh[j])) , c(upper[j],rev(lower[j])), col=rgb(0,1,0,.3), border=NA )