# Edps 590BAY ---- post online ------ # Spring 2018 # C.J.Anderson # # Multilevel Logistic Regression (i.e., Rasch model in disguise, which is not the best # model for these data--we'll do more IRT in next # lecture) # # Example using 2004 GSS vocabulary items and highest degree earned. I # pulled this from data set I used for a chapter on Categorical Data Analysis # with a Psychometric Twist which was published in Handbook of Quantitative # Psychology edited by Roger Millsap & Albert Maydeu-Olavres. I put the data # for class one course web-site # # # The sample size if large (~1155) which makes the long file very long. # It takes time to run the model in Jags. It is ok in glmer and nlme. # You could just take a sub-sample of data if want to run jags and not # tie up your computer for a long time. # # Contents: # glmer: Rasch1: Rasch via LaPlace # Rasch2: Rasch via adaptive quadrature # # nlme: Rasch3: Yet another way to fit Rasch # Rasch4: Rasch with highest degree earned as predictor of # n latent vocabulary knowledge # # Graph: ICC curves using parameters from Rasch2 # # JAGS: multilevel logistic regression <- takes a long time # # library(lme4) library(rjags) library(runjags) library(coda) setwd("C:\\Users\\cja\\Dropbox\\edps 590BAY\\Lectures\\10 Multilevel models") vocab <- read.table("vocab_data_long.txt",header=TRUE) vo5 <- as.data.frame(cbind(vocab$id,vocab$y,vocab$A,vocab$B,vocab$C,vocab$D,vocab$E)) names(vo5) <- c("id","y","A","B","C","D","E") # If you data are in the "wide" format see the following for how to transform # https://rstudio-pubs-static.s3.amazonaws.com/78961_fe5b5c6a77f446eca899afbb32bd1dc7.html # # Only 5 items -- # Rasch via laplace rasch1 <- glmer(y ~ -1 + A + B + C + D + E + (1 | id),data=vo5, family=binomial) summary(rasch1) # Only 5 items -- Rasch2 via adaptive quad rasch2 <- glmer(y ~ -1 + A + B + C + D + E + (1 | id),data=vo5, family=binomial, nAGQ=10) summary(rasch2) ############################# RASCH as glmer and nlme ################################ # Rasch via nlme onePL <- function(b1,b2,b3,b4,b5,theta) { b=b1*A + b2*B + b3*C + b4*D + b5*E exp(theta-b)/( 1 + exp(theta-b) ) } rasch3 <- nlme(Y ~ onePL(b1,b2,b3,b4,b5,theta), data=vocab, fixed= b1+b2+b3+b4+b5+b6+b7+b8+b9+b10 ~ 1, random = theta ~ 1 |id, start=c(b1=1, b2=1,b3=1, b4=1, b5=1) ) summary(rasch3) # Rasch with explanatory variables for theta onePL <- function(b1,b2,b3,b4,b5,epsilon) { b=b1*A + b2*B + b3*C + b4*D + b5*E theta = v1*age + v2*elementary + v3*hsplus + epsilon exp(theta-b)/( 1 + exp(theta-b) ) } rasch4 <- nlme(Y ~ onePL(b1,b2,b3,b4,b5,epsilon), data=vocab, fixed= b1 +b2 +b3 +b4 +b5 +v1 +v2 +v3 ~ 1, random = epsilon ~ 1 |id, start=c(b1=1, b2=1,b3=1, b4=1, b5=1, v1=.1, v2=.1, v3=.1 ) ) summary(rasch4) # Plot of item functions using rasch2 theta <- seq(-5,5,by=.1) for (i in 1:length(theta)) { pa <- 1/ (1 + exp(-(theta + 1.80384))) pb <- 1/ (1 + exp(-(theta + 3.08115))) pc <- 1/ (1 + exp(-(theta - 1.40359))) pd <- 1/ (1 + exp(-(theta + 3.27740))) pe <- 1/ (1 + exp(-(theta + 1.94476))) } plot(theta,pa,type="l", main="5 Vocabulary Item Characteristic Curves: Rasch", xlab="Theta (ability, knowledge, etc)", ylab="P(item=correct)", col="blue", lwd=2) lines(theta,pb,col="red",lwd=2) lines(theta,pc,col="green",lwd=2) lines(theta,pd,col="cyan",lwd=2) lines(theta,pe,col="orange",lwd=2) legend(-4.85,1.0, legend=c("A", "B", "C", "D", "E"), title="Item", col=c("blue","red","green","cyan","orange"), lty=1, cex=0.8) ########### Jags multilevel logistic regression ############## dataList <- list( id=vo5$id, y=vo5$y, A=vo5$A, B=vo5$B, C=vo5$C, D=vo5$D, E=vo5$E, n=length(vo5$y), Nid=length(unique(vo5$id)) ) logreg1 <-"model { for (i in 1:n) { y[i] ~ dbern(p[i]) p[i] <- 1/(1 + exp(-eta[i])) eta[i] <- theta[id[i]] + ba*A[i] + bb*B[i] + bc*C[i] + bd*D[i] + be*E[i] } for (j in 1:Nid) { theta[j] ~ dnorm(0,ptau) } ptau ~ dgamma(0.01,0.01) tau <- 1/sqrt(ptau) ba ~ dnorm(0,1/1000) bb ~ dnorm(0,1/1000) bc ~ dnorm(0,1/1000) bd ~ dnorm(0,1/1000) be ~ dnorm(0,1/1000) }" writeLines(logreg1,con="logreg1.txt") ################### check whether model compiles ################## start1<- list("ba"=2,"bb"=3.0,"bc"=-1.5, "bd"=3.0, "be"=2.0, "ptau"=.001) logreg1.chk<- run.jags( model=logreg1, sample=100, data=dataList, inits=start1, monitor=c("ba","bb", "bc", "bd", "be","tau"), n.chains=1 ) print(logreg1.chk) # if print doesn't work, use #add.summary(logreg1.chk) plot(logreg1.chk) ################################################################### start1<- list("ba"=2,"bb"=3.0,"bc"=-1.5, "bd"=3.0, "be"=2.0, .RNG.name="base::Wichmann-Hill", .RNG.seed=333) start2<- list("ba"=rnorm(1,0,4),"bb"=rnorm(1,0,4),"bc"=rnorm(1,0,4), "bd"=rnorm(1,0,4), "be"=rnorm(1,0,4), .RNG.name="base::Marsaglia-Multicarry", .RNG.seed=24) start3<- list("ba"=rnorm(1,1,4),"bb"=rnorm(1,1,4),"bc"=rnorm(1,1,4), "bd"=rnorm(1,0,4), "be"=rnorm(1,0,4), .RNG.name="base::Mersenne-Twister", .RNG.seed=2000) start4<- list("ba"=rnorm(1,-1,4),"bb"=rnorm(1,-1,4),"bc"=rnorm(1,1,4), "bd"=rnorm(1,1,4), "be"=rnorm(1,1,4), .RNG.name="base::Super-Duper", .RNG.seed=67) start <- list(start1,start2,start3,start4) logreg1.runjags <- run.jags(model=logreg1, method="parallel", monitor=c("ba","bb", "bc", "bd", "be","tau"), data=dataList, sample=10000, n.chains=4, adapt=500, inits=start) add.summary(logreg1.runjags) summary(rasch2) plot(logreg1.runjags)