# This looks useful: # https://rstudio-pubs-static.s3.amazonaws.com/78961_fe5b5c6a77f446eca899afbb32bd1dc7.html # Edps/Psych/Stat 587 # Fall 2016 # Carolyn J. Anderson # # Version 1 of R code to go with notes on Models for Clustered Data # # Fitting random intercept model using Nels10 data # # Install and load lme4 and lattice library(lme4) library(lattice) # Change directory to where the data live and then nels10 <- read.table(file="NELS10.txt", header=TRUE) head(nels10) tail(nels10) # So can just use variable name without also using data table name attach(nels10) # simple descriptive statistics summary(math) mean(math) var(math) # Plain old linear regression (no predictors) linreg <- lm(math ~ 1, data=nels10) linreg # basic results anova(linreg) # to get ANOVA table summary(linreg) # summary of stuff plot(linreg, las=1) # plots fitted by resdiuals # # ANOVA using schools (fixed effects)...this is REML # id <- as.factor(sch_id) fixedANOVA <- lm(math ~ id, data=nels10) summary(fixedANOVA) anova(fixedANOVA) # # ANOVA using schools (fixed effects)...this is MLE (same or different?) # id <- as.factor(sch_id) fixedANOVA2 <- lm(math ~ id, data=nels10, REML=FALSE) summary(fixedANOVA2) anova(fixedANOVA2) # # This fits random intercept model using lmer using REML, which is default # (random effects ANOVA) ranintREML <- lmer(math ~ 1 + (1 | sch_id), data=nels10, REML=TRUE) summary(ranintREML) # # This fits random intercept model using lmer using MLE # (random effects ANOVA using MLE) ranintMLE <- lmer(math ~ 1 + (1 | sch_id), data=nels10, REML=FALSE) summary(ranintMLE) icc <- (30.54)/(30.54 + 72.24) icc # # Simple linear (normal) regression # multreg <- lm(math ~ 1 + homework, data=nels10) summary(multreg) anova(multreg) fitmath <- fitted(multreg) plot(homework, fitmath, type='l', main='Linear Regression of Math on Time Spent Doing Homework', xlab='Time Spent Doing Homework', ylab='Fitted Math Scores from Linear Regression') # # This basically fits a separate regression line for each school (id) # mathbysch <- lm(formula = math ~ 1*id + homework * id, data = nels10) summary(mathbysch) fschmath <- fitted(mathbysch) # Try 1, but don't want lines from different schools connected plot(homework, fschmath, type='l', main='Try1: Separate Linear Regression for Each School', xlab='Time Spent Doing Homework', ylab='Fitted Math Scores' ) # Try 2 works much better (it's more work but figure is correct) school.list <- unique(id) length(school.list) coef <- coefficients(mathbysch) intercepts <- c(0, coef[3:11]) slopes <- c(0, coef[12:20]) plot(data = nels10, math~homework,type = 'n', ylim = c(min(math),max(math)), xlim = c(min(homework),max(homework)), cex.main = .75, xlab = 'Time Spent Doing Homework', ylab = "Math Scores", main = "Separate Regression for Each School") for(i in 1:length(school.list)){ abline(a = (coef[1] + intercepts[i]), b = (coef[2]+slopes[i]), col = 'blue') par<- par(new=F) } # # Some graphs/plots # # Random intercept and Slope HLM # ---- this is analogous to fitting regression to each school but the school effects are # random. The black line is the average school and the blue ones are the conditional # ones (i.e., they include school specific estimates of intercept & slope) # # model2 <- lmer(math ~ homework + ( homework | id), data=nels10, REML=FALSE) summary(model2) school.list <- unique(id) length(school.list) fixed.effects <- fixef(model2) rand.effects <- ranef(model2) params.student <- cbind((rand.effects$id[1]+fix[1]),(rand.effects$id[2]+fix[2])) plot(data = nels, math~homework,type = 'n', ylim = c(min(math),max(math)), xlim = c(min(homework),max(homework)), cex.main = .75, xlab = 'Time Spent Doing Homework', ylab = "Math Scores", main = "Estimated Conditional Models from Random Intercepts & Slope Model") abline(a=fixed.effects[1], b= fixed.effects[2], col='black', lwd=2) for(i in 1:length(school.list)){ abline(a = params.student[i,1], b = params.student[i,2],col = 'blue') par<- par(new=F) }