# Edps/Psych/Stat 587 # Spring 2019 # Carolyn J Anderson # # All analyses in lecture on Random Intercept Models in R (High School & Beyond Ones) # # # I relied heavliy on Introduction to Multilevel Modeling in R by Grover, Guillermono and Hudson (2015) # downloaded from https://rstudio-pubs-static.s3.amazonaws.com/78961_fe5b5c6a77f446eca899afbb32bd1dc7.html # # For p-value and different degrees of freedom using lmerTest package see # http://mindingthebrain.blogspot.com/2014/02/three-ways-to-get-parameter-specific-p.html # Below I use lmerTest, which give you Satterthwaited df and p-values for HLM (these are same # as given by SAS) # # # Load Packages: # lme4 # lattice # lmrTest # texreg --- for nicer output # # Set working directory directory # setwd('D:\\Dropbox\\edps587\\lectures\\3 randomintercept') ############################################################################ # Data set up: merge hsb1 and hsb2 # ############################################################################ # # Read in student level data # hsb1 <- read.table(file="HSB1data.txt", header=TRUE) # Top & bottom of the file # head(hsb1) tail(hsb1) # # Read in school level data # hsb2 <- read.table(file="HSB2data.txt", header=TRUE) # Top & bottom of the file # head(hsb2) tail(hsb2) # # # Make long matrix # # # # Already Sorted by school id # # # # Merging student & school level files by school id ('id') # # # hsb <- merge(hsb1,hsb2, by=c('id')) head(hsb) # # Attach to make variables easier to name # attach(hsb) ######################################################################### # Some information about the data (e.g., descriptive statistics # ######################################################################### # # Number of schools nschools <- nrow(hsb2) nschools # # List of schools school.list <- hsb2$id # # Overall summary statistics # summary(mathach) sd(mathach) mean(mathach) summary(ses) sd(ses) # # Summary descriptive statistics by School # mmean <- aggregate(mathach~id, data=hsb, FUN="mean") msd <- aggregate(mathach~id, data=hsb, sd) mmin <- aggregate(mathach~id, data=hsb, min) mmax <- aggregate(mathach~id, data=hsb, max) math.stat <- cbind(mmean,msd[,2], mmin[,2], mmax[,2]) names(math.stat) <- c('Mean Math', 'SD math', 'Min', 'Max') math.stat #################################################### # Some graphing # #################################################### # # Plot of mean ses by id --> variability between schools # plot(id, meanses, type='p', pch=21, main='HSB: Group Mean SES by School ID', xlab='School ID', ylab='Group Mean SES') # # Plot of student ses by id --> variability within schools # plot(id, ses, type='p', pch=21, main='HSB: Student SES by School ID', xlab='School ID', ylab='Group Mean SES') # # Just one school # school.1224 <- hsb[ which(id==1224), ] plot(school.1224$ses, school.1224$mathach, type='p', pch=21, main="HSB: Math by SES for School 1224", xlab='Student SES', ylab='Math Achievement') reg.1224 <- lm(school.1224$mathach~school.1224$ses, data=school.1224) abline(reg.1224) # # Spanel plot of a random sample of 20 # # Will need this for graph cses <- ses - meanses hsb <- cbind(hsb,cses) groups <- unique(hsb$id)[sample(1:160,20)] subset <- hsb[hsb$id%in%groups,] xyplot(mathach ~ ses | id, data=subset, col.line='black', type=c('p'), main='Varability in Math ~ SES relationship') # # panel plot of a random sample of 20 with regression lines # xyplot(mathach ~ ses | id, data=subset, col.line='black', type=c('p','r'), main='Varability in Math ~ SES relationship') # # Panel Now with school centered ses # xyplot(mathach ~ cses | id, data=subset, col.line='black', type=c('p','r'), main='Math Achievement by School Centered SES', xlab='School Centered SES', ylab='Math Achievement Test Scores') # # All regression lines in one plot # schid <- as.factor(id) mathbysch <- lm(formula = mathach ~ 1*schid + ses*schid, data = hsb) summary(mathbysch) fschmath <- fitted(mathbysch) school.list <- unique(schid) length(school.list) coef <- coefficients(mathbysch) intercepts <- c(0, coef[3:161]) slopes <- c(0, coef[162:320]) # Plot all regressions in one plot plot(data = hsb, mathach~ses,type = 'n', ylim = c(-10, 30), xlim = c(-3, 3), cex.main = 1.5, xlab = 'SES', ylab = "Math Scores", main = "Separate Regression for Each School " ) for(i in 1:length(school.list)){ abline(a = (coef[1] + intercepts[i]), b = (coef[2]+slopes[i]), col = 'blue') par<- par(new=F) } abline(v=c(0), type='l', lwd=2, color='black') ########################################################### # Create and add mean meanminority & meanfemale to data # ########################################################### # The data file already has mean SES but need mean center ses meanminority <- aggregate(minority ~ id , data=hsb, FUN="mean") names(meanminority) <- c('id', 'meanminority') hsb <- merge(hsb,meanminority, by =c('id')) meanfemale <- aggregate(female ~ id , data=hsb, FUN="mean") names(meanfemale) <- c('id', 'meanfemale') hsb <- merge(hsb,meanfemale, by =c('id')) # If you want to save data so that you don't got through steps # to create is again, you can write the results to a file. # There are other file formats that you can use (e.g., csv) # write.table(hsb, 'hsb.txt', row.names=F, na='.') ###################################################################### # Model fitting # # Note that "Std.Dev." in the Random effects table produced by lmer # is NOT the standard error of the variance estmates. The latter # is not computed in lmer but it is computed in SAS. These is some # controversy over whether se of variance estimates should even be # computed. We do not use them, so not problem. ###################################################################### # # Attach file that has all variables # attach(hsb) # # Random inercept / empty HLM / Null model / RANDOM effects ANOVA # # model0 <- lmer(mathach ~ 1 + (1 | id), data=hsb, REML=FALSE) summary(model0) # Cut and paste icc <- 8.553/(8.553+39.148) icc # Or extract the information to compute vars <- as.data.frame(VarCorr(model0))[4] total <- sum(vars) tau00 <- vars[1,1] icc <- tau00/total icc # Use the following function that I wrote: icc.lmer <- function(modl){ vars <- as.data.frame(VarCorr(modl))[4] total <- sum(vars) tau00 <- vars[1,1] icc <- tau00/total return(icc) } # example of how to use this icc.lmer(model0) # # Random intercept and fixed slope for ses # # # To get fixed effects df (Satterhwaite) p-value # require(lmerTest) model1 <- lmer(mathach ~ 1 + ses + (1 | id), data=hsb, REML=FALSE) summary(model1) icc.lmer(model1) #################### ## Nifty Package # #################### library(texreg) # Nice display screenreg(list(model0,model1)) mytable <- texreg(list(model0, model2), label = "tab:4", caption = "Bolded coefficients, custom notes, three digits.", float.pos = "h", bold = 0.05, stars = 0, custom.note = "Coefficients with $p < 0.05$ in \\textbf{bold}.", digits = 3, leading.zero = FALSE, omit.coef = "Inter") # # Random intercept and fixed slope & meanses for ses # model2 <- lmer(mathach ~ 1 + ses + meanses + (1 | id), data=hsb, REML=FALSE) summary(model2) # # Need to center ses (if haven't already done above for graph) # cses <- ses - meanses model3 <- lmer(mathach ~ 1 + cses + (1 | id), data=hsb, REML=FALSE) summary(model3) icc.lmer(model3) # Plot population average regression intercept <- fixef(model3)[1] slope <- fixef(model3)[2] intercept.j <- ranef(model3) student.int <- intercept.j$id[1] plot(mathach~cses,type = 'n', data = hsb, ylim = c(-10, 30), xlim = c(-3, 3), cex.main = 1.5, xlab = 'School Centered SES', ylab = "Fitted Math Achievement Scores", main = "Population Average Regression " ) # Population average line for(i in min(cses):max(cses)){ abline(a=intercept, b=slope, color='black',lwd=4) } # Population and School Specific plot(mathach~cses,type = 'n', data = hsb, ylim = c(-10, 30), xlim = c(-3, 3), cex.main = 1.5, xlab = 'School Centered SES', ylab = "Fitted Math Achievement Scores", main = "Population & School Specific Regressions " ) # draw in scho0l specific line for (i in 1:length(school.list)){ abline(a=(intercept+student.int[i,]), b=slope, type='l', col='lightpink') } # Population average line for(i in min(cses):max(cses)){ abline(a=intercept, b=slope, color='black',lwd=4) } # # Need to center ses & add back in mean ses # model4 <- lmer(mathach ~ 1 + cses + meanses + (1 | id), data=hsb, REML=FALSE) summary(model4) icc.lmer(model4) # # lots of level 1 variables # model5 <- lmer(mathach ~ 1 + cses + female + minority + meanses + (1 | id), data=hsb, REML=FALSE) summary(model5) icc.lmer(model5) # # lots of level 1 & 2 variables # model6 <- lmer(mathach ~ 1 + cses + female + minority + meanses + himinty + pracad + disclim + sector + size +(1 | id), data=hsb, REML=FALSE) summary(model6) screenreg(list(model0,model5,model6,model7)) # # even more 2 variables # model7 <- lmer(mathach ~ 1 + cses + female + minority + meanses + meanminority + meanfemale + himinty + pracad + disclim + sector + size +(1 | id), data=hsb, REML=FALSE) summary(model7) # Quick comparison screenreg(list(model0,model5,model6,model7))