# Logistic regression (more on diagnositics # esr example # Edps 589 # Fall 2018 # c.j.anderson # # I just copied from R for hsb example library(data.table) # maybe I don't need this library(dplyr) library(MASS) # needed for negatiave binomial glm library(vcd) library(vcdExtra) library(ggplot2) library(ResourceSelection) # hosmer lemshow statistic library(pROC) # ROC curve/data library(DescTools) # concordance index library(car) # influence diagnostics ######################################################## # Set up and read in data # ######################################################## setwd("D:\\Dropbox\\edps 589\\5 logistic regression") #setwd("C:\\Users\\cja\\Dropbox\\edps 589\\5 logistic regression") #setwd("C:\\Users\\cja\\Dropbox\\edps 589\\5 logistic regression") esr <- read.table(file="esr_data.txt", header=TRUE) names(esr) head(esr) summary(model.fg <- glm(response ~ fibrin + globulin, data=esr, family=binomial)) summary(model.g <- glm(response ~ globulin, data=esr, family=binomial)) anova(model.g,model.fg) summary(model.f <- glm(response ~ fibrin , data=esr, family=binomial)) anova(model.f,model.fg) # See what is available. (infl.f <- influence.measures(model.f)) influencePlot(model.f, id.col="red", scale=8, id.cex=1.5, id.n=6) influenceIndexPlot(model.f, vars=c("Cook","Studentized","hat"), id.n=5) # plot dfbeta for fibrin # -- set up data frame dfbetas <- data.frame(infl.f$infmat[,1:2]) # -- set up plot layout op <- par(mar=c(5,5,1,1) + 1) colors <- ifelse(esr$response==1, "red","blue") plot(dfbetas[,2], type='h', col=colors, xlab="Index of Observations", ylab=expression(paste("Delta + ",beta,"(fibrin)")), cex.lab=1.5, ylim=c(-.625,.625), main="Dfbeta for Fibrin for individuals" ) points(dfbetas[,2],col=colors) abline(h=c(-0.25,0,0.25), col="grey") # -- labeling the big ones is a pain bigOnes <- abs(dfbetas[,2])>0.25 positions <- ifelse(dfbetas[bigOnes,2]>0, 3, 1) i <- 1:length(bigOnes) text(i[bigOnes],dfbetas[bigOnes,2], label=rownames(dfbetas)[bigOnes], cex=.7, pos=positions, xpd=TRUE) par(op)