# edps 589
# Fall 2018
# c.j.anderson
#
#  Sods multinomial discrete choice models
#   o Chocolates that have only alternative (object) specific
#       (a) Using glm (i.e., fitting it as poisson regression model
#       (b) Using mlogit
#
#   o Brands and price data
#       (a) Using glm
#       (b) Using mlogit
#
#   o Modes of transportion
#       (a) Using mlogit
#


setwd("C:\\Users\\cja\\Dropbox\\edps 589\\8 Multicategory_logit")
#setwd("D:\\Dropbox\\edps 589\\8 Multicategory_logit")

library(mlogit)  # for bigger data set you might want to use mnlogit

######################################################################
# Chocolate data set                                                 #
######################################################################
choc <- read.table("chocolates_data.txt",header=TRUE)
head(choc)

#########################
# (a) glm               #
#########################
summary( glm.choc <- glm(choice ~ dark + soft + nuts, data=choc, family=poisson) )

beta <- glm.choc$coefficients

#For the probabilities:		 
pi0 <- exp(beta[1]+ beta[2]*choc$dark[1:8] +beta[3]*choc$soft[1:8] + beta[4]*choc$nuts[1:8])
(pi.hat <- pi0/sum(pi0))

# For easier of knowing what's what
probs <- cbind(choc[1:8,],pi.hat)
(pi.hat <- probs[order(probs$pi.hat),])


#######################
# (b) mlogit          #
#######################

# Creates a logical variable:  "TRUE" for chocolate picked
#                              "FALSE" otherwise
choc$pick <-  choc$choice==1

# Create strings for attributes of chocolates
choc$dark.s <- ifelse(choc$dark==1, "Dk","Mk")
choc$soft.s <- ifelse(choc$soft==1, "Sft","Hrd")
choc$nuts.s <- ifelse (choc$nuts==1, "Nut","Nnts")
	 
# concatenate strings to have a variable that describes each chocolate
choc$alt<- paste(choc$dark.s,choc$soft.s,choc$nuts.s,sep="_")

# To make mlogit happy
c <- mlogit.data(choc,shape="long",choice="pick", alt.var="alt")

# The model formula:  
#  (logical variable for choice ~   generic alternative specific | individual specific |  alternative specific )

fm <- mFormula(pick ~ dark + soft + nuts | 0 | 0) 

model <- mlogit(fm,c)
summary(model)
probs <- model$probabilities[1,]
probs <- sort(probs)

as.matrix(probs,nrow=8,ncol=1)


####################################################
#  Brands and Prices                               #
####################################################
                                   
####################
# (a) As poisson   #
####################								   
brands <- read.table("brands_price_data.txt",header=TRUE)
head(brands)


brands$combo <- as.factor(brands$combo)
brands$brand <- as.factor(brands$brand)

# ...and to match SAS output
brands <- within(brands, brand <- relevel(brand, ref = "5"))

#  In models, combo is just a nusiance variable        
# simple model

summary( mdc1 <- glm(choice ~ combo + brand + price, data=brands, family=poisson) )
anova(mdc1,test="LRT")

# with interaction
summary( mdc2 <- glm(choice ~ combo + brand + price + brand*price, data=brands, family=poisson) )
anova(mdc2,test="LRT")

# Test between them
anova(mdc1,mdc2)


beta <- glm.choc$coefficients
#For the probabilities:		 
pi0 <- exp(beta[1]+ beta[2]*choc$dark[1:8] +beta[3]*choc$soft[1:8] + beta[4]*choc$nuts[1:8])
(pi.hat <- pi0/sum(pi0))

# For easier of knowing what's what
probs <- cbind(choc[1:8,],pi.hat)
(pi.hat <- probs[order(probs$pi.hat),])


#################
# (b) mlogit    #
#################

# mlogit expects data at individual level
#   So here it is in format that I need 

longer <- read.table("brands_mdc_data.txt",header=TRUE)

# Choice to be logical
longer$choice <- longer$Y==1

longer$brand <- "brand5"
longer$brand[longer$brand1==1] <- "brand1"
longer$brand[longer$brand2==1] <- "brand2"
longer$brand[longer$brand3==1] <- "brand3"
longer$brand[longer$brand4==1] <- "brand4"

# Create alternative's description in terms of price and brand
longer$alternative <- paste(longer$brand,longer$price,sep="_")

# To make mlogit happy
long.m <- mlogit.data(longer,shape="long",choice="choice", alt.levels=c("A","B","C","D","E"), id="case")

# The model formula format:  
#  (logical variable for choice ~   generic alternative specific | individual specific |  alternative specific )

# main effects
fm1 <- mFormula(choice ~ brand1 + brand2 + brand3 + brand4 + price | 0 | 0 ) 

summary(model.1<- mlogit(fm1,long.m) )

# Estimated odds ratios
(exp.beta <- exp(model.1$coefficients[1:5]) )

# w/ interaction
fm2 <- mFormula(choice ~ brand1 + brand2 + brand3 + brand4 + price + brand1*price + brand2*price + brand3+price + brand4*price | 0 | 0 ) 

summary(model.2<- mlogit(fm2,long.m) )


###############################################################
# Transportation    --- mlogit                                #
###############################################################
# Data from Powers & Xie
#
# Variables used are:
#  mode= Mode of transportation chosen    first=train, 2nd=bus, 3rd=car
#  ttime= Time in terminal   
#  invc= Time in vehicle 
#  gc = Generalized cost 
#  hinc = Household income 
#  id = person id

trans <- read.table("transportation_data.txt",header=TRUE)

#n <- length(unique(trans$id))
#trans$alt <- rep(c("Bus","Train","Car"),n) 


# To make mlogit happy
tm <- mlogit.data(trans,shape="long",choice="mode", alt.levels=c("Train","Bus","Car"),id="id")
#  (logical variable for choice ~   generic alternative specific | individual specific |  alternative specific )

# Attributes of modes of transportation 
fm.a <- mFormula(mode ~ ttme + invc + invt + gc | 0  | 0) 
summary(model.a <- mlogit(fm.a,tm))


# Attributes of modes of transportation AND individual specific
# This is reported in notes:

fm.b <- mFormula(mode ~ ttme  + invc + invt + gc | hinc  | 0 ) 
summary(model.a <- mlogit(fm.b,tm))