# edps 589 # Fall 2018 # c.j.anderson # # Proportional odds & basline for high school and beyond data # o Recoding # o Proportional odds model (4 different methods) # (note I also did baseline using vglm) # o Graphing # o Baseline model for comparision # # # for baseline multinomial logistic regression library(nnet) # for proportional odds model library(MASS) # plor library(VGAM) # vglm library(rms) # lrm library(ordinal) # clm library(car) # Anova (note captial "A") library(dplyr) # for select function #setwd("C:\\Users\\cja\\Documents\\Dropbox\\edps 589\\8 Multicategory_logit") setwd("D:\\Dropbox\\edps 589\\8 Multicategory_logit") hsb <- read.table("hsb_data.txt",header=TRUE) # Re-code hsp so that it corresponds to what is in the notes: hsb$hsprog <- 0 hsb$hsprog[hsb$hsp==2] <- 1 hsb$hsprog[hsb$hsp==1] <- 2 hsb$hsprog[hsb$hsp==3] <- 3 # But I would rather the shorted name: hsp # ...and I need one as factor and other as numeric # (which we need, depends on the package) hsb$hsp <- hsb$hsprog ##################################### # Proportional odds model # # (a) clm from ordinal package # # (b) polr from MASS package # # (c) lrm from the rms package # # (d) vglm from the VGAM package # ##################################### ###### # (a) clm from ordinal package ###### # response has to be a factor hsb$hsp <- as.factor(hsb$hsp) po <- clm(hsp ~ achieve, data=hsb) anova(po) # test proportional odds assumption (before really looking at model) # --- the test statistics is in column label "LRT" nominal_test(po) # Not let's look at parameters summary(po) # Fitted probabilities for each type of hsp: hsb$po.fit <- po$fitted ###### # (b) polr from MASS package ###### # response has to be a factor hsb$hsp <- as.factor(hsb$hsp) summary(po.polr <- polr(hsp ~ achieve, data=hsb,Hess=TRUE ) # See what's available (check manual for defintions of these names(po.polr) # calculate and store p values ctable <- coef(summary(po.polr)) p <- pnorm(abs(ctable[, "t value"]), lower.tail = FALSE) * 2 # combined table of coefficeints, se, t and pvalues (ctable <- cbind(ctable, "p value" = p)) # default method gives profiled CIs (ci <- confint(po.polr, level=.99) # odds ratios exp(coef(po.polr)) fit <- po.polr$fitted.values ###### # (c) lrm from the rms package # ###### # I'm not sure how to use this one (yet) ###### # (d) vglm from the VGAM package ####### # Note: response should be numeric (ordered) summary( po.vglm1 <- vglm(hsprog ~ achieve,family=cumulative(parallel=TRUE), data=hsb) ) summary( po.vglm2 <- vglm(hsprog ~ achieve,family=cumulative(parallel=FALSE), data=hsb) ) # Difference in deviances of these two models (same as given by SAS): lr <- 1082.413 - 1081.608 # p for testing proportional odds assumption 1 - pchisq(lr,1) # Compare with summary( multinom.vglm <- vglm(hsprog ~ achieve,family=multinomial, data=hsb) ) ############################################################################ # Graphs ############################################################################ # get just the academic academ <- subset(filter(hsb, hsp==1),select=c("hsp","achieve","po.fit")) j <- order(academ$achieve) plot(academ$achieve[j],academ$po.fit[j], type='l', lwd=2, col="red", ylim=c(0,1), ylab="Predicted probability", xlab="Achievement Scores", main="Proportional Odds Model fit to HSB Data" ) general <- subset(filter(hsb, hsp==2),select=c(hsp,achieve,po.fit)) j <- order(general$achieve) lines(general$achieve[j],general$po.fit[j],col="green", lwd=2) votech <- subset(filter(hsb, hsp==3),select=c(hsp,achieve,po.fit)) j <- order(votech$achieve) lines(votech$achieve[j],votech$po.fit[j],col="blue", lwd=2) legend("topleft",inset=.02,legend=c("Academic","General","VoTech"), col=c("red","green","blue"), lwd=2, cex=1) # Fitted cumulative probabilities ag <- rbind(academ,general) ag$pAG <- exp(-5.547988 +0.1330308*ag$achieve)/(1+exp(-5.547988 +0.1330308*ag$achieve)) j <- order(academ$achieve) plot(academ$achieve[j],academ$po.fit[j], type='l', lwd=2, col="blue", ylim=c(0,1), ylab="Predicted Cumulative probability", xlab="Achievement Scores", main="Proportional Odds Model fit to HSB Data" ) j <- order(ag$achieve) lines(ag$achieve[j],ag$pAG[j],col="red", lwd=2) legend("topleft",inset=.02,legend=c("P(academic or general)","P(academic)"), col=c("red","blue"), lwd=2, cex=1) ############################################################# # Baseline # ############################################################# # But I would rather the shorted name: hsp hsb$hsp <- hsb$hsprog ## This is taking votech as baseline summary(base1 <- multinom(hsp ~ achieve, data = hsb) ) # z=sqrt(wald) statistics for parameters (z <- summary(base1)$coefficients/summary(base1)$standard.errors) (p <- (1 - pnorm(abs(z), 0, 1)) * 2) # estimated odds ratios for 1 unit change in achieve: # -- general vs votech exp(0.0599778) # -- academic vs votech exp(0.16985890) # -- general vs acahdemic exp(0.0599778-0.16985890) # estimated odds ratios for 10 unit change in achievement: # -- general vs votech exp(0.0599778*10) # -- academic vs votech exp(0.16985890*10) # -- general vs academic exp((0.0599778-0.16985890)*10)