# Edps/Psych/Stat 587 # Spring 2019 # Carolyn J. Anderson # # This looks useful: # https://rstudio-pubs-static.s3.amazonaws.com/78961_fe5b5c6a77f446eca899afbb32bd1dc7.html # Version 1 of R code to go with notes on Models for Clustered Data # # Fitting random intercept model using Nels10 data # # Install and load lme4 and lattice library(lme4) library(lattice) # Change directory to where the data live and then setwd("D:\\Dropbox\\edps587\\lectures\\2 anovas") nels10 <- read.table(file="NELS10.txt", header=TRUE) names(nels10) head(nels10) tail(nels10) # So can just use variable name without also using data table name attach(nels10) # simple descriptive statistics summary(math) mean(math) var(math) # # Summary statistics by school # mn <- aggregate(math~sch_id, data=nels10, mean) sd <- aggregate(math~sch_id, data=nels10, sd) med <- aggregate(math~sch_id, data=nels10, median) simple <- cbind(mn, sd[,2], med[,2]) round(simple,digits=2) # # plot mean +/- 1 standard deviation by group # # -- a numbers 1 to 10 as index variable simple$index <- seq(1:10) plot(simple$index, simple$math, pch=19, xlab="School", ylab="Math", xlim=c(min(simple$index),max(simple$index)), ylim=c(30,70), main="School Mean Math Score +/- one Standard Deviation" ) lines(rbind(1:10,1:10,NA),rbind(simple$math-simple[,3],simple$math+simple[,3],NA)) # Plain old linear regression (no predictors) linreg <- lm(math ~ 1, data=nels10) linreg # basic results anova(linreg) # to get ANOVA table summary(linreg) # summary of stuff plot(linreg, las=1) # plots fitted by resdiuals # # ANOVA using schools (fixed effects)...this is ***REML*** # id <- as.factor(sch_id) fixedANOVA <- lm(math ~ id, data=nels10,REML=TRUE) summary(fixedANOVA) anova(fixedANOVA) # # ANOVA using schools (fixed effects)...this is ***MLE*** # --- are REML and MLE estimates the same or different? # REML=FALSE doesn't seem to work (gives the same as # REML=TRUE # id <- as.factor(sch_id) fixedANOVA2 <- lm(math ~ id, data=nels10, REML=FALSE) summary(fixedANOVA2) anova(fixedANOVA2) # # This fits random intercept model using lmer using REML, which is default # (random effects ANOVA) ranintREML <- lmer(math ~ 1 + (1 | sch_id), data=nels10, REML=TRUE) summary(ranintREML) # # This fits random intercept model using lmer using MLE # (random effects ANOVA using MLE) ranintMLE <- lmer(math ~ 1 + (1 | sch_id), data=nels10, REML=FALSE) summary(ranintMLE) (icc <- (30.54)/(30.54 + 72.24)) # or icc without cutting and pasting (less rounding error) # -- save the summary into s as a data frame s <- summary(ranintMLE) # -- see what available...we want what's in "varcor" as a data frame names(s) (s <- as.data.frame(s$varcor)) (icc2 <- s$vcov[1] / (s$vcov[1] + s$vcov[2])) # # Simple linear (normal) regression # multreg <- lm(math ~ 1 + homework, data=nels10) summary(multreg) anova(multreg) fitmath <- fitted(multreg) plot(homework, math, type='p', main='Linear Regression of Math on Time Spent Doing Homework', xlab='Time Spent Doing Homework', ylab='Fitted Math Scores from Linear Regression') lines(homework,fitmath,col="blue") # Lattice plot with linear regressions xyplot(math ~ homework | id, col.line='black', type=c('p','r'), cex.xlab=1.5, cex.ylab=1.5, xlab="Time Spent Doing Homework", ylab="Math Scores", cex.main=1.2, main='Variability in Math ~ Homework relationship') # # This basically fits a separate regression line for each school (id) # mathbysch <- lm(formula = math ~ 1*id + homework * id, data = nels10) summary(mathbysch) fschmath <- fitted(mathbysch) # Try 2 works much better (it's more work but figure is correct) school.list <- unique(id) length(school.list) coef <- coefficients(mathbysch) intercepts <- c(0, coef[3:11]) slopes <- c(0, coef[12:20]) plot(data = nels10, math~homework,type = 'n', ylim = c(min(math),max(math)), xlim = c(min(homework),max(homework)), cex.main = 1.25, xlab = 'Time Spent Doing Homework', ylab = "Math Scores", main = "Separate Regression for Each School") colors <- c("red", "orange", "darkgoldenrod1","magenta", "green", "blue", "purple", "white", "grey", "black") for(i in 1:length(school.list)){ abline(a = (coef[1] + intercepts[i]), b = (coef[2]+slopes[i]), col = colors[i]) par<- par(new=F) } # # Some graphs/plots # # Random intercept and Slope HLM # ---- this is analogous to fitting regression to each school but the school effects are # random. The black line is the average school and the blue ones are the conditional # ones (i.e., they include school specific estimates of intercept & slope) # # model2 <- lmer(math ~ homework + ( homework | id), data=nels10, REML=FALSE) summary(model2) school.list <- unique(id) length(school.list) fixed.effects <- fixef(model2) rand.effects <- ranef(model2) params.student <- cbind((rand.effects$id[1]+fix[1]),(rand.effects$id[2]+fix[2])) plot(data = nels, math~homework,type = 'n', ylim = c(min(math),max(math)), xlim = c(min(homework),max(homework)), cex.main = .75, xlab = 'Time Spent Doing Homework', ylab = "Math Scores", main = "Estimated Conditional Models from Random Intercepts & Slope Model") abline(a=fixed.effects[1], b= fixed.effects[2], col='black', lwd=2) for(i in 1:length(school.list)){ abline(a = params.student[i,1], b = params.student[i,2],col = 'blue') par<- par(new=F) }