# # Edps/Psych/Stat 587 # Spring 2020 # Carolyn J Anderson # # All analyses in lecture on Random Intercept & Slope Models in R (High School & Beyond Ones) # # # I relied heavliy on Introduction to Multilevel Modeling in R by Grover, Guillermono and Hudson (2015) # downloaded from https://rstudio-pubs-static.s3.amazonaws.com/78961_fe5b5c6a77f446eca899afbb32bd1dc7.html # # For p-value and different degrees of freedom using lmerTest package see # http://mindingthebrain.blogspot.com/2014/02/three-ways-to-get-parameter-specific-p.html # Below I use lmerTest, which give you Satterthwaited df and p-values for HLM (these are same # as given by SAS) # # # Load Packages: # lme4 ---- fits hlm models (and multilevel logistic) # lattice--- multiple plots per page # lmrTest--- Satterthwaited df and t-tests for fixed paramters # texreg --- for nicer output # optimx --- change the optimizer that lmer uses # library(lme4) library(lmerTest) library(lattice) library(texreg) # nice lmer output there are others, e.g., stargazer and sjplot library(optimx) # Set working dircetory # #setwd('C:/Users/cja/Dropbox/edps587/lectures/4 randomslope') setwd('D:/Dropbox/edps587/lectures/4 randomslope') ############################################################################ # Data set up: merge hsb1 and hsb2 # ############################################################################ # # Read in student level data # hsb1 <- read.table(file="HSB1data.txt", header=TRUE) # Top & bottom of the file # head(hsb1) tail(hsb1) # # Read in school level data # hsb2 <- read.table(file="HSB2data.txt", header=TRUE) # Top & bottom of the file # head(hsb2) tail(hsb2) # # # Make long matrix # # # # Already Sorted by school id # # # # Merging student & school level files by school id ('id') # # # hsb <- merge(hsb1,hsb2, by=c('id')) head(hsb) ######################################################################### # Some information about the data (e.g., descriptive statistics # ######################################################################### # # Number of schools (nschools <- nrow(hsb2)) # # Overall summary statistics # summary(hsb$mathach) sd(hsb$mathach) summary(hsb$ses) sd(hsb$ses) # # Summary descriptive statistics by School # mmean <- aggregate(mathach~id, data=hsb, "mean") msd <- aggregate(mathach~id, data=hsb, sd) mmin <- aggregate(mathach~id, data=hsb, min) mmax <- aggregate(mathach~id, data=hsb, max) math.stat <- cbind(mmean,msd[,2], mmin[,2], mmax[,2]) names(math.stat) <- c('Mean Math', 'SD math', 'Min', 'Max') math.stat #################################################### # Some graphing # #################################################### # # Plot of mean ses by id --> variability between schools # plot(hsb$id, hsb$meanses, type='p', pch=21, main='HSB: Group Mean SES by School ID', xlab='School ID', ylab='Group Mean SES') # # Plot of student ses by id --> variability within schools # plot(hsb$id, hsb$ses, type='p', pch=21, main='HSB: Student SES by School ID', xlab='School ID', ylab='Group Mean SES') # # Just one school # school.1224 <- hsb[ which(hsb$id==1224), ] plot(school.1224$ses, school.1224$mathach, type='p', pch=21, main="HSB: Math by SES for School 1224", xlab='Student SES', ylab='Math Achievement') reg.1224 <- lm(school.1224$mathach~school.1224$ses, data=school.1224) abline(reg.1224) # # Spanel plot of a random sample of 20 # # Will need this for graph hsb$cses <- hsb$ses - hsb$meanses # Random sample groups <- unique(hsb$id)[sample(1:160,20)] subset <- hsb[hsb$id%in%groups,] xyplot(mathach ~ ses | id, data=subset, col.line='black', type=c('p'), main='Varability in Math ~ SES relationship') # # panel plot of a random sample of 20 with regression lines # xyplot(mathach ~ ses | id, data=subset, col.line='black', type=c('p','r'), main='Varability in Math ~ SES relationship') # # Panel Now with school centered ses # xyplot(mathach ~ cses | id, data=subset, col.line='black', type=c('p','r'), main='Math Achievement by School Centered SES', xlab='School Centered SES', ylab='Math Achievement Test Scores') # # All regression lines in one plot # # # page 3 Plot all regressions in one plot # # need school ID equal to 1 - 160 sch_id <- seq(1:nschools) id <- unique(hsb$id) sch_info <- as.data.frame(cbind(id,sch_id)) hsb <- merge(hsb,sch_info,by=c("id")) # First make frame for plot plot(hsb$ses,hsb$mathach,type = 'n', ylim = c(-10, 30), xlim = c(-3, 3), cex.main = 1.5, xlab = 'SES', ylab = "Math Scores", main = "Separate Regression for Each School " ) # now fit and add regression lines for(i in 1:nschools){ sub <- hsb[which(hsb$sch_id==i),] fitted <- fitted(lm(mathach~ses,data=sub)) lines(sub$ses,fitted,col=i) } ###################################################### # ~ page 45 Graph of estimated variances of random # # intercept and intercept & slope # ###################################################### x <- seq(from=-2, to=2, by=.1) total<- 8 + 2*(-0.5)*x + (1.5)*(x**2) + 30 plot(x,total,type = 'l', cex.main = 1.5, col='blue', xlim=c(-2,2), xlab = 'Value of x', ylab = "Estimated Variance of x", main = "Variance Function: Illustration of Heteroscedasticity" ) ########################################################### # Create and add mean meanminority & meanfemale to data # ########################################################### # The data file already has mean SES but need mean center ses hsb$ses.centered <- hsb$ses - hsb$meanses meanminority <- aggregate(minority ~ id , data=hsb, mean) names(meanminority) <- c('id', 'meanminority') hsb <- merge(hsb,meanminority, by =c('id')) meanfemale <- aggregate(female ~ id , data=hsb, mean) names(meanfemale) <- c('id', 'meanfemale') hsb <- merge(hsb,meanfemale, by =c('id')) # If you want to save data so that you don't got through steps # to create is again, you can write the results to a file. # There are other file formats that you can use (e.g., csv) # #write.table(hsb, 'hsb.txt', row.names=F, na='.') ###################################################################### # Model fitting # # Note that "Std.Dev." in the Random effects table produced by lmer # is NOT the standard error of the variance estmates. The latter # is not computed in lmer but it is computed in SAS. These is some # controversy over whether se of variance estimates should even be # computed. We do not use them, so not problem. ###################################################################### # # Random inercept / empty HLM / Null model / RANDOM effects ANOVA # # summary(model0 <- lmer(mathach ~ 1 + (1 | id), data=hsb, REML=FALSE)) screenreg(list(model0)) # Cut and paste icc <- 8.553/(8.553+39.148) icc # Or extract the information to compute vars <- as.data.frame(VarCorr(model0))[4] total <- sum(vars) tau00 <- vars[1,1] icc <- tau00/total icc # Use the following function that I wrote: icc.lmer <- function(modl){ vars <- as.data.frame(VarCorr(modl))[4] total <- sum(vars) tau00 <- vars[1,1] icc <- tau00/total return(icc) } # example of how to use this icc.lmer(model0) # # Random intercept and slope for centered ses # summary(model1 <- lmer(mathach ~ 1 + ses.centered + (1 | id), data=hsb, REML=FALSE)) icc.lmer(model1) # for easier comparison screenreg(list(model0,model1),single.row=TRUE,custom.model.names=c("Null","RI cses")) # for latex table texreg(list(model1),single.row=TRUE,custom.model.names=c("RI cses")) # # Random intercept and slope & meanses for ses # summary(model2 <- lmer(mathach ~ 1 + ses.centered + (1 + ses.centered| id), data=hsb, REML=FALSE)) as.data.frame(VarCorr(model2))[4] deviance(model2) # for easier comparison screenreg(list(model0,model1,model2),single.row=TRUE,custom.model.names=c("Null","RI", "RI + S")) # and for me, latex code texreg(list(model0,model1,model2),single.row=TRUE,custom.model.names=c("Null","RI","RI and S")) ############################ # ~ page 20 plot # ############################ par(mfrow=c(1,1)) fixed <- fixef(model2) mu2 <- fixed[1] + fixed[2]*hsb$ses.centered plot(hsb$ses.centered, mu2, type='l', col='black', xlim=c(-2, 2), ylim=c(0, 25), xlab='School Mean Centered SES', ylab='Fitted Math Scores', main='HSB: Population Average Regressions (Model 2)' ) ################################################################ # ~ page 21, Sample of fitted school specific regression lines # ################################################################ random <- ranef(model2, level=0:2) U0j <- random$id[,1] U1j <- random$id[,2] plot(hsb$mathach ~ hsb$ses.centered, type = 'n', cex.main = 1.5, xlab = 'School Mean Centered SES', ylab = "Fitted Math Scores", main = "HSB: Fitted School Regression Lines (private,white)" ) # draw in scho0l specific lines: Private, white for (i in 1:40){ abline(a=(fixed[1]+ U0j[i]), b=(fixed[2]+U1j[i]), col="blue") } ############################################## # ~ p 22 Side by Side Comparison # ############################################## par(mfrow=c(1,2)) plot(hsb$ses.centered, mu2, type='l', col='black', xlim=c(-2, 2), ylim=c(0, 25), xlab='School Mean Centered SES', ylab='Fitted Math Scores', main='HSB: Population Average Regressions (Model 2)' ) plot(hsb$mathach ~ hsb$ses.centered, type = 'n', cex.main = 1.5, xlab = 'School Mean Centered SES', ylab = "Fitted Math Scores", main = "HSB: Fitted School Regression Lines" ) # draw in scho0l specific lines: Private, white for (i in 1:40){ abline(a=(fixed[1]+ U0j[i]), b=(fixed[2]+U1j[i]), col='blue') } ######################################################## # ~ Page 23, estimated variance functions for 2 models# ######################################################## par(mfrow=c(1,1)) ses2 <- seq(from=-2, to= 3, by=.1) vars.ri <- as.data.frame(VarCorr(model1))[4] total.ri <- rep(sum(vars.ri), 61) vars.ris <- as.data.frame(VarCorr(model2))[4] total.ris<- vars.ris[1,] + 2*vars.ris[3,]*ses2 + vars.ris[2,]*(ses2**2) + vars.ris[4,] plot(ses2,total.ris,type = 'l', cex.main = 1.5, col='blue', xlim=c(-2,3), ylim=c(45,52), xlab = 'School Mean Centered SES', ylab = "Estimated Variance of Math Scores", main = "HSB: Variance Function" ) # random inercept abline(a=sum(vars.ri), b=0, col='red') legend(-2,52, c('Random Intercept & Slope','Random Intercept'), lty=c(1,1), lwd=c(2.5,2.5), col=c('blue','red')) ################################################################# # Discrete Level 1 predictors # ################################################################# # Note female and minority are dummy coded model.d1 <- lmer(mathach ~ 1 + ses.centered + female + minority+ (1 + ses.centered| id), data=hsb, REML=FALSE) summary(model.d1) as.data.frame(VarCorr(model.d1))[4] n2lnlike <- -2* logLik(model.d1) n2lnlike screenreg(list(model1,model2,model.d1),custom.model.names=c("RI","RI and RS","Discrete fixed")) # Plot of population average model fixed <- fixef(model.d1) fem_mi <- fixed[1] + fixed[2]*hsb$ses.centered + fixed[3] + fixed[4] plot(hsb$ses.centered, fem_mi, type='l', col='black', ylim = c(0,25), xlim = c(-2,2), xlab='School Centered SES', ylab='Fitted Math Scores', main='Population Average Regressions (discrete fixed)' ) abline(a=(fixed[1] + fixed[3]), b=fixed[2], col='red') abline(a=(fixed[1] + fixed[4]), b=fixed[2], col='blue') abline(a=(fixed[1]), b=fixed[2], col='green') legend(-2,25, c('Female, Minority', 'Female, non-Minority', 'Male, Minority', 'Male, non-Minority'), lty=c(1,1,1,1), col=c('black', 'red', 'blue', 'green') ) # # Plot of variance function # ses2 <- seq(from=-2, to= 3, by=.1) vars.d1 <- as.data.frame(VarCorr(model.d1))[4] total.d1<- vars.d1[1,] + 2*vars.d1[3,]*ses2 + vars.d1[2,]*(ses2**2) + vars.d1[4,] plot(ses2,total.d1,type = 'l', cex.main = 1.5, col='blue', xlim=c(-2,3), xlab = 'School Mean Centered SES', ylab = "Estimated Variance of Math Scores", main = "HSB: Variance Function (discrete fixed)" ) ##################################################### # ~ page 66, models fit to data # # Showing effect of random categorical predictors # ##################################################### hsb$gender <- as.factor(hsb$female) hsb$race <- as.factor(hsb$minority) hsb$boy <- as.factor(-1*(hsb$female-1)) hsb$male <- -1*(hsb$female-1) summary(model.00 <- lmer(mathach ~ 1 + ses.centered + gender + race + (1 + ses.centered| id), data=hsb, REML=FALSE)) as.data.frame(VarCorr(model.00))[4] # Model b1 summary(model.b1 <- lmer(mathach ~ 1 + ses.centered + female + race + (1 + ses.centered + female| id), data=hsb, REML=FALSE)) as.data.frame(VarCorr(model.b1))[4] # Alternative Model b1 summary(model.ab1 <- lmer(mathach ~ 1 + ses.centered + gender + race + (1 + ses.centered + gender| id), data=hsb, REML=FALSE)) as.data.frame(VarCorr(model.ab1))[4] # Model b2 summary(model.b2 <- lmer(mathach ~ 1 + ses.centered + hsb$male + race + (1 + ses.centered + hsb$male| id), data=hsb, REML=FALSE)) as.data.frame(VarCorr(model.b2))[4] # Model c. summary(model.c <- lmer(mathach ~ 1 + ses.centered + hsb$male + race + (0 + ses.centered + male + female + minority| id), data=hsb, REML=FALSE)) as.data.frame(VarCorr(model.b2))[4] # Model: random slopes for all--- THIS FAILS as it SHOULD summary(model.all <- lmer(mathach ~ 1 + ses.centered + female + minority + (1 + ses.centered + female + minority | id), data=hsb, REML=FALSE)) # Model c + minority --- FAILS IN R BUT NOT SAS summary(model.c <- lmer(mathach ~ 1 + ses.centered + female + minority + (0 + ses.centered + female + race| id), data=hsb, REML=FALSE)) ###################################################################################### # You're on your own for the figures --- cut, paste & edit code for similar figures # ###################################################################################### ######################################################### # p118ish Cross level interactions # ######################################################### summary(model.3 <- lmer(mathach ~ 1 + ses.centered + meanses + ses.centered*meanses + (1 + ses.centered| id), data=hsb, REML=FALSE)) as.data.frame(VarCorr(model.3))[4] # drop non-significant interactions summary(model.4 <- lmer(mathach ~ 1 + ses.centered + meanses + (1 + ses.centered| id), data=hsb, REML=FALSE)) # # p127 Big model --- R starts to get slow, can switch to micro-soft (free) R which is about 4 times faster. # # # this fails summary(model.5 <- lmer(mathach ~ 1 + ses.centered + female + minority + meanses + size + sector + pracad + disclim + himinty +(1 + ses.centered + female + minority| id), data=hsb, REML=FALSE)) # # However rescale school size which has a very different scale # hsb$xsize <- scale(hsb$size,center=FALSE,scale=TRUE) summary(model.5 <- lmer(mathach ~ 1 + ses.centered + female + minority + meanses + xsize + sector + pracad + disclim + himinty +(1 + ses.centered + female + minority| id), data=hsb, REML=FALSE)) as.data.frame(VarCorr(model.5))[4] # # Other "tricks" to get models to converge is topic for another lecture, # but I did need an additional one to get model 6 to converge. # # p127 Bigger model --- add sector as cross-level (i.e. predictor at level 2) & drop random gender. # Do see some differences in estimates of tau relative to SAS. # # Needed to center himinity and change optimzier to get convergence # hsb$xhiminty <- scale(hsb$himinty,center=FALSE,scale=TRUE) summary(model.6 <- lmer(mathach ~ 1 + ses.centered + female + minority + meanses + xsize + sector + pracad + disclim + xhiminty + ses.centered*sector + minority*sector + (1 + ses.centered + minority| id), data=hsb, REML=FALSE, control = lmerControl(optimizer ="Nelder_Mead") )) as.data.frame(VarCorr(model.6))[4] # # p127 A bit simpler model --- only random intercept # summary(model.7 <- lmer(mathach ~ 1 + ses.centered + female + minority + meanses + xsize + sector + pracad + disclim + ses.centered*sector + minority*sector + (1| id), data=hsb, REML=FALSE)) as.data.frame(VarCorr(model.7))[4]