# edps 589 # Fall 2018 # c.j.anderson # # baseline multinomial logit: HSB # # library(nnet) # for baseline multinomial logistic regression library(dplyr) # for select function library(tidyr) # for converting from wide to long format setwd("C:\\Users\\cja\\Dropbox\\edps 589\\8 Multicategory_logit") #setwd("D:\\Dropbox\\edps 589\\8 Multicategory_logit") hsb <- read.table("hsb_data.txt",header=TRUE) #################################################### # Model 1: Simple # #################################################### hsb$hsp <- as.factor(hsb$hsp) summary( mlogit <- multinom(hsp ~ achieve, data = hsb) ) # Change the reference category to VoTech, which is hsp=3. # This will give same parameters as are in the lecture notes hsb$hsp <- relevel(hsb$hsp, ref = "3") summary( mlogit <- multinom(hsp ~ achieve, data = hsb) ) # estimated odds ratios for 1 unit change in achieve: # -- general vs votech exp(0.0599778) # -- academic vs votech exp(0.16985890) # -- general vs acahdemic exp(0.0599778-0.16985890) # estimated odds ratios for 10 unit change in achievement: # -- general vs votech exp(0.0599778*10) # -- academic vs votech exp(0.16985890*10) # -- general vs acahdemic exp((0.0599778-0.16985890)*10) names(mlogit) # same fitted probabilites head(mlogit$fitted) hsb$fitV <- mlogit$fitted[,1] hsb$fitG <- mlogit$fitted[,2] hsb$fitA <- mlogit$fitted[,3] hsb <- hsb[order(hsb$achieve),] # What probabilities look like plot(hsb$achieve,hsb$fitV, type="l",lwd=2,col="blue", ylim=c(0,1), ylab="Fitted Probabilities", xlab="Achievement", main="Fitted Probabilities from Baseline Model") lines(hsb$achieve,hsb$fitG, type="l",lwd=2,col="green") lines(hsb$achieve,hsb$fitA, type="l",lwd=2,col="red") legend("topleft",legend=c("P(VoTech)","P(General)", "P(Academic)"), col=c("blue","green","red"), lwd=2) ########################################### # Add SES as nominal and then as ordinal # # explanatory varaible # ########################################### # -- SES nominal hsb$nses <- as.factor(hsb$ses) hsb$nses <- relevel(hsb$nses, ref = "3") summary(mlogit2 <- multinom(hsp ~ achieve + nses, data=hsb)) # --- some differences between this and SAS (even with # same reference categories; however, the deviances # between SAS and R match. # z=sqrt(wald) statistics for parameters (z <- summary(mlogit2)$coefficients/summary(mlogit2)$standard.errors) (p <- (1 - pnorm(abs(z), 0, 1)) * 2) # -- SES ordinal summary(mlogit3 <- multinom(hsp2 ~ achieve + ses, data=hsb)) # -- NOTE: anova does not work with multinom objects # -- test of SES nominal vs ordinal (lrt <- mlogit3$deviance -mlogit2$deviance) 1-pchisq(lrt,2) # -- test achieve conditional on SES ordinal (lrt <- mlogit$deviance -mlogit3$deviance) 1-pchisq(lrt,2) # z=sqrt(wald) statistics for parameters (z <- summary(mlogit3)$coefficients/summary(mlogit3)$standard.errors) (p <- (1 - pnorm(abs(z), 0, 1)) * 2) ############################################ # Fit model using long format and glm as a # # poisson regression # ############################################ hsb$id <- as.factor( seq(1:length(hsb$ses) )) t <- table(hsb$id,hsb$hsp) hsb.hsp <- as.data.frame(t) names(hsb.hsp) <- c("id","program","y") long <- merge(hsb, hsb.hsp, by=c("id")) long$id <- as.factor(long$id) long$program <- as.factor(long$program) # -- take a little longer but works fine summary(long.mod <- glm(y ~ id + program + program*achieve, data=long, family=poisson)) ############################################ # Test whether slopes can be same # ############################################ ## Method 1: create a new variable that equates ## the slopes for general and votech # no restrictions summary( logit.3slopes <- glm(y ~ id + program + program*achieve, data=long, family=poisson)) # restrictions long$program2 <- ifelse(long$program=="2", 0, 1) table(long$program2,long$program) summary( logit.2slopes <- glm(y ~ id + program + program2*achieve, data=long, family=poisson)) anova(logit.2slopes,logit.3slopes,test="LRT") ## Method 2: Sub-set data and just run logistic ## regression of binary data hsb.VG <- sample(hsb[(hsb$hsp != "2"),]) summary(VGmod <- glm(hsp ~ achieve,data=hsb.VG,family=binomial)) anova(VGmod,test="LRT")