# edps 589 # Fall 2018 # c.j.anderson # # Sods multinomial discrete choice models # o Chocolates that have only alternative (object) specific # (a) Using glm (i.e., fitting it as poisson regression model # (b) Using mlogit # # o Brands and price data # (a) Using glm # (b) Using mlogit # # o Modes of transportion # (a) Using mlogit # setwd("C:\\Users\\cja\\Dropbox\\edps 589\\8 Multicategory_logit") #setwd("D:\\Dropbox\\edps 589\\8 Multicategory_logit") library(mlogit) # for bigger data set you might want to use mnlogit ###################################################################### # Chocolate data set # ###################################################################### choc <- read.table("chocolates_data.txt",header=TRUE) head(choc) ######################### # (a) glm # ######################### summary( glm.choc <- glm(choice ~ dark + soft + nuts, data=choc, family=poisson) ) beta <- glm.choc$coefficients #For the probabilities: pi0 <- exp(beta[1]+ beta[2]*choc$dark[1:8] +beta[3]*choc$soft[1:8] + beta[4]*choc$nuts[1:8]) (pi.hat <- pi0/sum(pi0)) # For easier of knowing what's what probs <- cbind(choc[1:8,],pi.hat) (pi.hat <- probs[order(probs$pi.hat),]) ####################### # (b) mlogit # ####################### # Creates a logical variable: "TRUE" for chocolate picked # "FALSE" otherwise choc$pick <- choc$choice==1 # Create strings for attributes of chocolates choc$dark.s <- ifelse(choc$dark==1, "Dk","Mk") choc$soft.s <- ifelse(choc$soft==1, "Sft","Hrd") choc$nuts.s <- ifelse (choc$nuts==1, "Nut","Nnts") # concatenate strings to have a variable that describes each chocolate choc$alt<- paste(choc$dark.s,choc$soft.s,choc$nuts.s,sep="_") # To make mlogit happy c <- mlogit.data(choc,shape="long",choice="pick", alt.var="alt") # The model formula: # (logical variable for choice ~ generic alternative specific | individual specific | alternative specific ) fm <- mFormula(pick ~ dark + soft + nuts | 0 | 0) model <- mlogit(fm,c) summary(model) probs <- model$probabilities[1,] probs <- sort(probs) as.matrix(probs,nrow=8,ncol=1) #################################################### # Brands and Prices # #################################################### #################### # (a) As poisson # #################### brands <- read.table("brands_price_data.txt",header=TRUE) head(brands) brands$combo <- as.factor(brands$combo) brands$brand <- as.factor(brands$brand) # ...and to match SAS output brands <- within(brands, brand <- relevel(brand, ref = "5")) # In models, combo is just a nusiance variable # simple model summary( mdc1 <- glm(choice ~ combo + brand + price, data=brands, family=poisson) ) anova(mdc1,test="LRT") # with interaction summary( mdc2 <- glm(choice ~ combo + brand + price + brand*price, data=brands, family=poisson) ) anova(mdc2,test="LRT") # Test between them anova(mdc1,mdc2) beta <- glm.choc$coefficients #For the probabilities: pi0 <- exp(beta[1]+ beta[2]*choc$dark[1:8] +beta[3]*choc$soft[1:8] + beta[4]*choc$nuts[1:8]) (pi.hat <- pi0/sum(pi0)) # For easier of knowing what's what probs <- cbind(choc[1:8,],pi.hat) (pi.hat <- probs[order(probs$pi.hat),]) ################# # (b) mlogit # ################# # mlogit expects data at individual level # So here it is in format that I need longer <- read.table("brands_mdc_data.txt",header=TRUE) # Choice to be logical longer$choice <- longer$Y==1 longer$brand <- "brand5" longer$brand[longer$brand1==1] <- "brand1" longer$brand[longer$brand2==1] <- "brand2" longer$brand[longer$brand3==1] <- "brand3" longer$brand[longer$brand4==1] <- "brand4" # Create alternative's description in terms of price and brand longer$alternative <- paste(longer$brand,longer$price,sep="_") # To make mlogit happy long.m <- mlogit.data(longer,shape="long",choice="choice", alt.levels=c("A","B","C","D","E"), id="case") # The model formula format: # (logical variable for choice ~ generic alternative specific | individual specific | alternative specific ) # main effects fm1 <- mFormula(choice ~ brand1 + brand2 + brand3 + brand4 + price | 0 | 0 ) summary(model.1<- mlogit(fm1,long.m) ) # Estimated odds ratios (exp.beta <- exp(model.1$coefficients[1:5]) ) # w/ interaction fm2 <- mFormula(choice ~ brand1 + brand2 + brand3 + brand4 + price + brand1*price + brand2*price + brand3+price + brand4*price | 0 | 0 ) summary(model.2<- mlogit(fm2,long.m) ) ############################################################### # Transportation --- mlogit # ############################################################### # Data from Powers & Xie # # Variables used are: # mode= Mode of transportation chosen first=train, 2nd=bus, 3rd=car # ttime= Time in terminal # invc= Time in vehicle # gc = Generalized cost # hinc = Household income # id = person id trans <- read.table("transportation_data.txt",header=TRUE) #n <- length(unique(trans$id)) #trans$alt <- rep(c("Bus","Train","Car"),n) # To make mlogit happy tm <- mlogit.data(trans,shape="long",choice="mode", alt.levels=c("Train","Bus","Car"),id="id") # (logical variable for choice ~ generic alternative specific | individual specific | alternative specific ) # Attributes of modes of transportation fm.a <- mFormula(mode ~ ttme + invc + invt + gc | 0 | 0) summary(model.a <- mlogit(fm.a,tm)) # Attributes of modes of transportation AND individual specific # This is reported in notes: fm.b <- mFormula(mode ~ ttme + invc + invt + gc | hinc | 0 ) summary(model.a <- mlogit(fm.b,tm))