# Edps/Psyc/Stat 587 # Spring 2019 # C.J.Anderson # # Model building # NELS 23 data --- most of the graphs in the lecture graphing fixed effects # # Note regarding Packages: # stringi (this needs to be installed before HLMdiag or HLMdiag won't load properly) # HLMdiag # # # load packages # library(lme4) library(lmerTest) library(lattice) library(ggplot2) library(stringi) library(HLMdiag) library(texreg) library(optimx) source("D:\\Dropbox\\edps587\\lectures\\6 inference1\\All_functions.txt") ############################################################################# # read in data into R # the first row of the data matrix is the variable names so there is a header # change to your path ############################################################################# nels<-read.table("D:\\Dropbox\\edps587\\lectures\\8 modelbuilding\\nels23_data.txt",header=TRUE) #look at first few rows of data set head(nels) # # Set up # nels$public <- ifelse (nels$schtype==1, 1, 0) nels$white <- ifelse (nels$race==4, 1, 0) schMses <- as.data.frame(aggregate(ses~school, data=nels, FUN="mean")) names(schMses) <- c('school', 'schMses') nels <- merge(nels,schMses, by=c('school')) nels$schCses <- nels$ses - nels$schMses nels$homewrk <- as.numeric(nels$homew) nels$school <- as.factor(nels$school) # I think I am done with data set up (at least for now) ############################################################### # Ask for next graph --- "enter" to proceed ############################################################### par(ask=TRUE) ######################### homework ############################# # # lm and overall 95% CI # math.homewrk <- lm(math~homewrk, data=nels) conflm1<-confint(math.homewrk) plot(nels$homewrk,nels$math,type='p', main='Overall regression with 95% CI') abline(math.homewrk) abline(coef=conflm1[,1],lty=2) abline(coef=conflm1[,2],lty=2) # # Smooth (loess) curve fit to data # # --- fit loess to data lw1 <- loess(math ~ homewrk,data=nels) # --- plot data plot(nels$homewrk,nels$math, main="NELS:Math Score Plotted vs Homework \n with smooth (loess) curve", xlab="Time Spent Doing homework", ylab="Math Scores", cex=1.0, col="black" ) # --- put in proper order j <- order(nels$homewrk) # --- Add loess to figure lines(nels$homewrk[j],lw1$fitted[j],col="blue",lwd=3) # # Joint points for each school # ################################ # # ~ page 25 compute mean and plot by school just connecting points # math.hmwk <- aggregate(math ~ homewrk + school, data=nels, FUN="mean") ggplot(math.hmwk, aes(x=homewrk, y=math, col=school,type='l'), main='Regressions all in one plot') + geom_line() + theme(legend.position="none") # --- alternate method plot(nels$homewrk,nels$math, main="NELS:Math Score Plotted vs Homework \n joining means for each school", xlab="Time Spent Doing homework", ylab="School Mean Math Scores", cex=1.0, col="black", type="n" ) # --- put in proper order j <- order(math.hmwk$homewrk) # --- Add to figure for (i in unique(math.hmwk$school)) { sub <- math.hmwk[which(math.hmwk$school==i),] lines(sub$homewrk,sub$math,col=i) } # # page 32 linear regression math~homewrk + race # # # Plot 1: Lattice plot --- math # xyplot(math ~ homew | school, data=nels, col.line='black', type=c('p'), main='Varability in Math ~ homework relationship') # # page 19 - 20 Plot 2: Lattice plot with regressions # xyplot(math ~ homew | school, data=nels, col.line='black', type=c('p','r'), main='Varability in Math ~ homework relationship') # # page 23-24 Plot 3: Lattice plot with smooth regressions (i.e., loess) # xyplot(math ~ homew | school, data=nels, col.line='black', type=c('p','smooth'), main='Varability in Math ~ homework relationship') # # compute means and plot # mathhmwk <- aggregate(math ~ homewrk, data=nels, FUN="mean") plot(mathhmwk[,1],mathhmwk[,2], type='b', xlab="Time Spent Doing Homework", ylab="Mean Math Score", main="NELS: Overal Means for Each Level Homework") # # page 25 compute mean and plot by school just connecting points # math.hmwk <- aggregate(math ~ homewrk + school, data=nels, FUN="mean") ggplot(math.hmwk, aes(x=homewrk, y=math, col=school,type='l'), main='Regressions all in one plot') + geom_line() + theme(legend.position="none") # # # page 26 All regressions in one figure # ggplot(nels, aes(x=homewrk, y=math, col=school,type='l'), main='Regressions all in one plot') + geom_smooth(method="lm", se=FALSE ) + theme(legend.position="none") # Alternate code to put all regressions in a single plot: # --- set up graphic frame: plot(nels$homew,nels$math, type="n", col="blue", lwd=2, main="NELS: Linear Regression by School \n(for where there is data)", xlab="Time Spent Doing Homework", ylab="Math Scores" ) # --- loop through the schools and add line for (j in unique(nels$school)) { sub <- nels[ which(nels$school==j),] fitted <- fitted(lm(math~homew,sub)) lines(sub$homew,fitted,col=j) } # # page 28 compute mean and plot by white # w <- as.factor(nels$white) math.white <- aggregate(math ~ homewrk + w, data=nels, FUN="mean") ggplot(math.white, aes(x=homewrk, y=math, col=w, type='l'), main='Regressions all in one plot') + geom_line() # # Alternate that looks nicer: plot(math.white$homewrk[1:7],math.white$math[1:7],type="l", main="Mean Math by Homework", xlab="Time Spent Doing Homework", ylab="Mean Math Score", col="blue") lines(math.white$homewrk[8:14],math.white$math[8:14],type="l",col="red") legend("topleft",legend=c("Non-White", "White"), col=c("blue","red"), lty=1, cex=1.2) # # page 29 linear regression math~homewrk + white # ggplot(math.white, aes(x=homewrk, y=math, col=w, type='l'), + geom_smooth(method="lm", se=FALSE ) ) # # Alternate that looks nicer--- regression on means math and homework by white # plot(math.white$homewrk,math.white$math,type="n", main="Regression of Math on Homework", xlab="Time Spent Doing Homework", ylab="Mean Math Score", ylim=c(40,70)) subw <- math.white[which(math.white$w==0),] lm1 <- lm(math ~ homewrk,data=subw) lines(subw$homewrk,fitted(lm1),col="red") subw <- math.white[which(math.white$w==1),] lm2 <- lm(math ~ homewrk,data=subw) lines(subw$homewrk,fitted(lm2),col="blue") legend("topleft",legend=c("Non-White", "White"), col=c("red","blue"), lty=1, cex=1.2) # # Alternate that looks nicer--- regression to whole uncollapsed data set # plot(math.white$homewrk,math.white$math,type="n", main="Regression of Math on Homework", xlab="Time Spent Doing Homework", ylab="Mean Math Score", ylim=c(40,70)) subw <- nels[which(nels$white==1),] abline(lm(math~homewrk,data=subw),col="blue") subn <- nels[which(nels$white==0),] abline(lm(math~homewrk,data=subn),col="red") legend("topleft",legend=c("Non-White", "White"), col=c("red","blue"), lty=1, cex=1.2) } # # page 31 compute mean and plot by race # r <- as.factor(nels$race) math.race <- aggregate(math ~ homewrk + r, data=nels, FUN="mean") ggplot(math.race, aes(x=homewrk, y=math, col=r, type='l'), main='Regressions all in one plot') + geom_line() text(0,77,'1=Asian/PI,',col='red') text(1.5,77,'2=Hispanic',col='darkolivegreen4') text(2.75,77,'3=Black',col='green') text(3.75,77,'4=White',col='blue') text(5.25,77,'5=Native Amercian',col='purple') # # Alternate that looks nicer # plot(math.race$homewrk[1:5],math.race$math[1:5],type="l", main="Mean Math by Homework", xlab="Time Spent Doing Homework", ylab="Mean Math Score", ylim=c(30,70), xlim=c(0,7), col="blue") lines(math.race$homewrk[6:11],math.race$math[6:11],type="l",col="red") lines(math.race$homewrk[12:18],math.race$math[12:18],type="l",col="darkgreen") lines(math.race$homewrk[19:26],math.race$math[19:26],type="l",col="purple") lines(math.race$homewrk[27:28],math.race$math[27:28],type="l",col="cyan") legend("topleft",legend=c("Asian/PI", "Hispanic","Black","White","Native Amercian"), col=c("blue","red","darkgreen","purple","cyan"), lty=1, cex=1.2) # # page 32 linear regression math~homewrk + race # ggplot(nels, aes(x=homewrk, y=math, col=r, type='l'), main='Regressions all in one plot') + geom_smooth(method="lm", se=FALSE ) text(0,77,'1=Asian/PI,',col='red') text(1.5,77,'2=Hispanic',col='darkolivegreen4') text(2.75,77,'3=Black',col='darkgreen') text(3.75,77,'4=White',col='blue') text(5.25,77,'5=Native Amercian',col='purple') # # Nicer version but extends beyond range where there is data # par(mfrow=c(1,1)) plot(math.race$homewrk[1:5],math.race$math[1:5],type="n", main="Mean Math by Homework", xlab="Time Spent Doing Homework", ylab="Mean Math Score", ylim=c(30,70), xlim=c(0,7)) abline(lm(math.race$math[1:5]~math.race$homewrk[1:5]),type="l",col="blue") abline(lm(math.race$math[6:11]~math.race$homewrk[6:11]),type="l",col="red") abline(lm(math.race$math[12:18]~math.race$homewrk[12:18]),type="l",col="darkgreen") abline(lm(math.race$math[19:26]~math.race$homewrk[19:26]),type="l",col="purple") abline(lm(math.race$math[27:28]~math.race$homewrk[27:28]),type="l",col="cyan") legend("topleft",legend=c("Asian/PI", "Hispanic","Black","White","Native Amercian"), col=c("blue","red","darkgreen","purple","cyan"), lty=1, cex=1.2) # # page 34 race x homework to show where there is not much data # par(ask=FALSE) # Does not extend beyond the range of data # --- set up graphic frame: plot(nels$homew,nels$math, type="n", col="blue", lwd=2, main="NELS: Linear Regression by School \n(for where there is data)", xlab="Time Spent Doing Homework", ylab="Math Scores" ) # --- loop through the schools and add line for (j in nels$race) { sub <- nels[ which(nels$race==j),] fitted <- fitted(lm(math~homew,sub)) lines(sub$homew,fitted,col=j) } legend("topleft",legend=c("Asian/PI", "Hispanic","Black","White","Native Amercian"), col=c("blue","red","darkgreen","purple","cyan"), lty=1, cex=1.2) # # Cross-classification of homew x race # with(nels, table(homew, race)) # # page 36 mean math by gender # nels$gender <- as.factor(nels$sex) math.gender <- aggregate(math ~ homewrk + gender, data=nels, FUN="mean") ggplot(math.gender, aes(x=homewrk, y=math, col=gender, type='l'), main='Regressions all in one plot') + geom_line() text(0,77,'1=Male,',col='red') text(1.5,77,'2=Female',col='lightseagreen') # # Nicer looking where fit lines to means # plot(math.gender$homewrk,math.gender$math,type="n", main="Regression of Math on Homework \n Seperate lines for genders", xlab="Time Spent Doing Homework", ylab="Mean Math Score") lines(math.gender$homewrk[1:8],math.gender$math[1:8],col="blue") lines(math.gender$homewrk[9:15],math.gender$math[9:15],col="red") legend("topleft",legend=c("Female", "Male"), col=c("blue","red"), lty=1, cex=1.2) # # page 37 # with(nels,table(sex,homewrk)) # # page 32 linear regression math~homewrk + race # ggplot(nels, aes(x=homewrk, y=math, col=gender, type='l'), main='Regressions all in one plot') + geom_smooth(method="lm", se=FALSE ) text(0,77,'1=Male,',col='red') text(1.5,77,'2=Female',col='lightseagreen') # # Nicer looking with linear regressions # # # Alternate that looks nicer but extends beyond data # plot(math.gender$homewrk,math.gender$math,type="n", main="Regression of Mean Math on Homework \n Seperate lines for genders", xlab="Time Spent Doing Homework", ylab="Mean Math Score", ylim=c(45,70)) abline(lm(math[1:7]~homewrk[1:7],data=nels),col="blue") abline(lm(math[8:14]~homewrk[8:14],data=nels),col="red") legend("topleft",legend=c("Female", "Male"), col=c("blue","red"), lty=1, cex=1.2) # Does not extend beyond the range of data and fits lines to uncollapsed data # --- set up graphic frame: plot(nels$homew,nels$math, type="n", col="blue", lwd=2, main="NELS: Linear Regression by School by Gender \n(for where there is data)", xlab="Time Spent Doing Homework", ylab="Math Scores" ) # --- loop through the schools and add line for (j in nels$sex) { sub <- nels[ which(nels$sex==j),] fitted <- fitted(lm(math~homew,sub)) lines(sub$homew,fitted,col=j) } ############################################## # SES ############################################## # # page 41-42 Lattice plot with regressions # xyplot(math ~ ses | school, data=nels, col.line='black', type=c('p','r'), main='Varability in Math ~ ses relationship') # # page 43-44 Plot 3: Lattice plot with smooth regressions (i.e., loess) # xyplot(math ~ ses | school, data=nels, col.line='black', type=c('p','smooth'), main='Varability in Math ~ ses relationship') # # page 45 All regressions in one figure # ggplot(nels, aes(x=ses, y=math, col=school,type='l'), main='Regressions all in one plot') + geom_smooth(method="lm", se=FALSE ) + theme(legend.position="none") # Alternate # --- set up graphic frame: par(ask=FALSE) plot(nels$homew,nels$ses, type="n", col="blue", lwd=2, main="NELS: Linear Regression by School \n(for where there is data)", xlab="Socio-econmic status (SES)", ylab="Math Scores", ylim=c(30,70), xlim=c(-2.5,2.5) ) # --- loop through the schools and add line for (j in unique(nels$school)) { sub <- nels[ which(nels$school==j),] sub2 <- sub[order(sub$ses),] fitted <- fitted(lm(math~ses,sub2)) lines(sub2$ses,fitted,col=j) } # # create 10 groups with about the same number per group # nels$ses.grp <- as.numeric(cut(nels$ses, 10)) nels$ses.grp <- as.factor(nels$ses.grp) mean.ses.grp <- aggregate(ses~ses.grp, data=nels, FUN="mean") mean.math.grp<- aggregate(math~ses.grp, data=nels, FUN="mean") # Plot grouped data plot(mean.ses.grp$ses,mean.math.grp$math, type='b', main="Grouped SES: math mean vs ses mean", xlab="Mean SES", ylab="Mean Math", col="blue" ) # Plot math vs SES plot(nels$ses,nels$math,type='p') ###################### Pick up here ######################## # Need to use grouped SES for nicer figures ############################################################ # Page 48: linear regression math ~ ses + gender # nels$gender <- as.factor(nels$sex) ggplot(nels, aes(x=ses, y=math, col=gender, type='l'), main='Regressions all in one plot') + geom_smooth(method="lm", se=FALSE ) text(0,77,'1=Male,',col='red') text(1.5,77,'2=Female',col='lightseagreen') # -- set up for nicer looking figure nels$ses.grp <- as.numeric(cut(nels$ses, 10)) nels$gender <- as.factor(nels$sex) math.gender <- aggregate(math ~ ses.grp + gender, data=nels, FUN="mean") ses.gender <- aggregate(ses ~ ses.grp + gender, data=nels, FUN="mean") # -- plot nicer one plot(ses.gender$ses[1:10],math.gender$math[1:10],type="l",lwd=2,col="red", main="Join Mean Math on Grouped SES \n Seperate lines for genders", xlab="Socio-econmic Status", ylab="Mean Math Score", ylim=c(30,70), xlim=c(-2.5,2.5)) lines(ses.gender$ses[11:20],math.gender$math[11:20],col="lightseagreen",lwd=2) legend("topleft",legend=c("Male","Female"), col=c("red","lightseagreen"), lty=1, lwd=2, cex=1.2) ## Linear regressions plot(nels$ses,nels$math,type="n", main="Regression of Math on SES \n Seperate lines for genders", xlab="Socio-econmic Status", ylab="Mean Math Score", ylim=c(30,70), xlim=c(-2.5,2.5)) abline(lm(math~ses ,data=nels[which(nels$sex==1),]),col="red",lwd=2) abline(lm(math~ses ,data=nels[which(nels$sex==2),]),col="seagreen",lwd=2) legend("topleft",legend=c("Male","Female"), col=c("red","seagreen"), lty=1, lwd=2, cex=1.2) # # Page 50 linear regression: math ~ ses + white # w <- as.factor(nels$white) math.white <- aggregate(math ~ ses.grp + w, data=nels, FUN="mean") ggplot(math.white, aes(x=ses, y=math, col=w, type='l'), main='Regressions all in one plot') + geom_smooth(method="lm", se=FALSE ) text(0,77,'1=Non-White,',col='red') text(1.5,77,'2=White',col='lightseagreen') # # Page 52: math ~ race, ses # nels$r <- as.factor(nels$race) ggplot(nels, aes(x=ses, y=math, col=r, type='l'), main='Regressions all in one plot') + geom_smooth(method="lm", se=FALSE ) text(-2,77,'1=Asian/PI,',col='red') text(-1.1,77,'2=Hispanic',col='darkolivegreen4') text(-.25,77,'3=Black',col='green') text(.4,77,'4=White',col='blue') text(1.5,77,'5=Native Amercian',col='purple') # # Page 52: math ~ sector, ses # nels$sector <- as.factor(nels$public) ggplot(nels, aes(x=ses, y=math, col=sector, type='l'), + geom_smooth(method="lm", se=FALSE ) ) text(0,77,'0=Private,',col='red') text(1.5,77,'1=Public',col='lightseagreen') ### # -- set up for nicer looking figure nels$ses.grp <- as.numeric(cut(nels$ses, 10)) nels$sector <- as.factor(nels$public) math.sector <- aggregate(math ~ ses.grp + sector, data=nels, FUN="mean") ses.sector <- aggregate(ses ~ ses.grp + sector, data=nels, FUN="mean") # -- plot nicer one plot(ses.sector$ses[1:8],math.sector$math[1:8],type="l",lwd=2,col="red", main="Join Mean Math on Grouped SES \n Seperate lines for Sectors", xlab="Socio-econmic Status", ylab="Mean Math Score", ylim=c(30,70), xlim=c(-2.5,2.5)) lines(ses.sector$ses[9:18],math.sector$math[9:18],col="blue",lwd=2) legend("topleft",legend=c("Private","Public"), col=c("red","blue"), lty=1, lwd=2, cex=1.2) # Smoother look using linear regression # -- plot nicer one plot(ses.sector$ses[1:8],math.sector$math[1:8],type="n", main="Linear Regresson Mean Math on SES \n Seperate lines for Sectors", xlab="Socio-econmic Status", ylab="Mean Math Score", ylim=c(30,70), xlim=c(-2.5,2.5)) abline(lm(math~ses ,data=nels[which(nels$public==0),]),col="red",lwd=2) abline(lm(math~ses ,data=nels[which(nels$public==1),]),col="blue",lwd=2) legend("topleft",legend=c("Private","Public"), col=c("red","blue"), lty=1, lwd=2, cex=1.2) ################# school mean centered ses ##################### # # page 55-56 Lattice plot with regressions # xyplot(math ~ schCses | school, data=nels, col.line='black', type=c('p','r'), main='Varability in Math ~ ses relationship') # # Lattice plot with smooth regressions (i.e., loess) # xyplot(math ~ schCses | school, data=nels, col.line='black', type=c('p','smooth'), main='Varability in Math ~ ses relationship') # # page 57 All regressions in one figure # ggplot(nels, aes(x=schCses, y=math, col=school,type='l'), main='Regressions all in one plot') + geom_smooth(method="lm", se=FALSE ) + theme(legend.position="none") # # page 58 linear regression math~ses + white # w <- as.factor(nels$white) ggplot(nels, aes(x=schCses, y=math, col=w, type='l'), main='Regressions all in one plot') + geom_smooth(method="lm", se=FALSE ) # # page 64 linear regression math~ses+ sector # sector <- as.factor(public) ggplot(nels, aes(x=schCses, y=math, col=sector, type='l'), main='Regressions all in one plot') + geom_smooth(method="lm", se=FALSE ) text(0,77,'0=Private,',col='red') text(1.5,77,'1=Public',col='lightseagreen') ######################### homework + sector ########################### # # page 60 linear regression math~homewrk+ sector # nels$sector <- as.factor(nels$public) ggplot(nels, aes(x=homewrk, y=math, col=sector, type='l'), main='Regressions all in one plot') + geom_smooth(method="lm", se=FALSE ) text(0,77,'0=Private,',col='red') text(1.5,77,'1=Public',col='lightseagreen') ########################## R2 and meta R2 ########################### # # # Regression to each ***************** seems to be working but not same as in notes # # nj <- table(nels$school) # number level 1 units per cluster nj <- as.numeric(nj) nclusters <- length(unique(nels$school)) csum <- cumsum(table(nels$school)) # cumulative frequencies cut2 <- csum # end index cut1 <- c(0, csum) + 1 # start index cut1 <- cut1[1:nclusters] # # Simple regression # ssmodel <- (0) sstotal <- (0) R2 <- (99) for (i in 1:nclusters){ sesBywhite <- nels$schCses[cut1[i]:cut2[i]]*nels$white[cut1[i]:cut2[i]] model0 <- lm(math[cut1[i]:cut2[i]] ~ 1 + homewrk[cut1[i]:cut2[i]] + schCses[cut1[i]:cut2[i]] + white[cut1[i]:cut2[i]] +sex[cut1[i]:cut2[i]] , data=nels ) R2 <- rbind(R2,summary(model0)$r.squared) a <- anova(model0) if (a[4,2] >0) { ssmodel <- ssmodel + a[1,2] + a[2,2] + a[3,2] + a[4,2] } sstotal <- sstotal + sum(a[,2]) } R2meta1 <- ssmodel/sstotal R2.mod1 <- R2[2:length(R2)] plot(nj, R2.mod1, type='p', ylim = c(0, 1), main='Simple model') abline(h=R2meta1, col='blue') text(50,0.95,'R2meta1=.53') # # a Little more complex regression # R2 <- (99) for (i in 1:nclusters){ sesBywhite <- nels$schCses[cut1[i]:cut2[i]]*nels$white[cut1[i]:cut2[i]] model0 <- lm(math[cut1[i]:cut2[i]] ~ 1 + homewrk[cut1[i]:cut2[i]] + schCses[cut1[i]:cut2[i]] + white[cut1[i]:cut2[i]] + sex[cut1[i]:cut2[i]] + sesBywhite, data=nels) R2 <- rbind(R2,summary(model0)$r.squared) } R2.mod2 <- R2[2:length(R2)] plot(nj, R2.mod2, type='p', ylim = c(0, 1), main='A little more complex') # # Compare fits # plot(R2.mod1,R2.mod2, pch=20, ylim=c(0,1), xlim=c(0,1), ylab='more complex', xlab='simpler model', main='The impact of adding gender, mean ses, schsize') abline(a=0,b=1, col='blue') # adds a reference line ################################################################ # Model fitting ################################################################ # Empty/baseline summary(model0 <- lmer(math ~ (1 | school), data=nels, REML=FALSE,control = lmerControl(optimizer ="Nelder_Mead"))) (icc <- 24.85/(24.85+81.24)) # page 141-145 Preliminary lmm summary( model1 <- lmer(math ~ homew + schCses + sex + white + schCses*white + schMses + public + (1 + homew | school), data=nels, REML=FALSE, control = lmerControl(optimizer ="Nelder_Mead"))) screenreg(model1) bic.hlm(model1,nels$school) # page 141-145 Preliminary lmm & and other random effects summary( model1b <- lmer(math ~ homew + schCses + sex + white + schCses*white + schMses + public + (1 + homew + schCses| school), data=nels, REML=FALSE, control = lmerControl(optimizer ="Nelder_Mead"))) plot(ranef(model1b),main="Estimated Random Effects",col="blue") bic.hlm(model1b,nels$school)(( summary( model1c <- lmer(math ~ homew + schCses + sex + white + schCses*white + schMses + public + (1 + homew + sex| school), data=nels, REML=FALSE, control = lmerControl(optimizer ="Nelder_Mead"))) bic.hlm(model1c,nels$school) plot(ranef(model1c),main="Estimated Random Effects",col="blue") summary( model1d <- lmer(math ~ homew + schCses + sex + white + schCses*white + schMses + public + (1 + homew + white| school), data=nels, REML=FALSE, control = lmerControl(optimizer ="Nelder_Mead"))) plot(ranef(model1d),main="Estimated Random Effects",col="blue") bic.hlm(model1d,nels$school) # test random slope---null model summary(model2 <- lmer(math ~ homew + schCses + sex + white + schCses*white + schMses + public + (1 | school), data=nels, REML=FALSE,control = lmerControl(optimizer ="Nelder_Mead"))) (lr <- 3673.6 - 3598.3 ) (pvalue <- .5*(pchisq(lr,1,lower.tail=FALSE)) + .5*(pchisq(lr,2,lower.tail=FALSE))) # page 151 Sector not significant--remove schMses summary(model3 <- lmer(math ~ homew + schCses + sex + white + schCses*white + public + (1 + homew | school), data=nels, REML=FALSE,control = lmerControl(optimizer ="Nelder_Mead"))) texreg(list(model3),single.row=TRUE,custom.model.names=c("Why Secter was ns")) anova(model3,model1) bic.hlm(model3,nels$school) bic.hlm(model1,nels$school) ### page 151 For LR test for sex & public summary(model1.reduced <- lmer(math ~ homew + schCses + white + schCses*white + schMses + (1 + homew | school), data=nels, REML=FALSE, control = lmerControl(optimizer ="Nelder_Mead"))) anova(model1.reduced,model1) bic.hlm(model1.reduced,nels$school) texreg(list(model1.reduced),single.row=TRUE,custom.model.names=c("Simpler Model")) deviance(model1.reduced) # page 156 summary(model4 <- lmer(math ~ homew + schCses + sex + white + schCses*white + schMses + (1 + homew | school), data=nels, REML=FALSE,control = lmerControl(optimizer ="Nelder_Mead"))) # page 157 no ses x white summary(model5 <- lmer(math ~ homew + schCses + white + schMses + (1 + homew | school), data=nels, REML=FALSE,control = lmerControl(optimizer ="Nelder_Mead"))) # summary of model fits anova(model5,model1.reduced) texreg(list(model5),single.row=TRUE,custom.model.names=c("Final Model")) bic.hlm(model5,nels$school) deviance(model5) screenreg(list(model0,model1,model2,model3,model4,model5)) ############################################################### # Graph of EBLUPs by school +/- 1 sd # ############################################################### # step 1 extract random effects ...this gives a mer object ranU <- as.data.frame(ranef(model5)) # step 2 put in separate objects U0j <- ranU[which(ranU$term=="(Intercept)"),] U1j <- ranU[which(ranU$term=="homew"),] # step 3 sort by ranU which is "condval" U0j <-U0j[order(U0j$condval),] U1j <-U1j[order(U1j$condval),] # step 4 Add intergers for schools so that I can plot in order U0j$xgrp <- seq(1:23) U1j$xgrp <- seq(1:23) # setp 5 Draw graph for U0j plot(U0j$xgrp,U0j$condval,type="p", main="Final Model: Random intercepts +/- 1 std error", xlab="Schools (sort by Uo)", ylab="EBLUP of U0j", col="blue") # step 6 For sd errors, dDraw arrows but with no arrowhead...A hack solution arrows(U0j$xgrp, U0j$condval-U0j$condsd, U0j$xgrp, U0j$condval+U0j$condsd, length=0.00) # Now for U1j: plot(U1j$xgrp,U1j$condval,type="p", main="Final Model: Random slopes +/- 1 std error", xlab="Schools (sort by U1j)", ylab="EBLUP of U1j", col="blue") # step 6 For sd errors, dDraw arrows but with no arrowhead...A hack solution arrows(U1j$xgrp, U1j$condval-U1j$condsd, U1j$xgrp, U1j$condval+U1j$condsd, length=0.00) ############################################################## # qq plot U0j and U1j # ############################################################## qqdata <- qqnorm(U0j$condval) plot(qqdata$x,qqdata$y,type="p",pch=19,col="red", main="Normal QQ plot of U0j with 95% Confidence Bands", xlab="Theoretical Value", ylab="Estimated U0j (EBLUPS)", ylim=c(-15,15)) ylab="Estimated U0j",) arrows(qqdata$x, qqdata$y - 1.96*U0j$condsd, qqdata$x, qqdata$y + 1.96*U0j$condsd, length=0.00, col="gray") qqline(U0j$condval,col="steelblue") qqdata <- qqnorm(U1j$condval) plot(qqdata$x,qqdata$y,type="p",pch=19,col="red", main="Normal QQ plot of Uuj with 95% Confidence Bands", xlab="Theoretical Value", ylab="Estimated Uuj (EBLUPS)", ylim=c(-8,8)) ylab="Estimated U1j",) arrows(qqdata$x, qqdata$y - 1.96*U1j$condsd, qqdata$x, qqdata$y + 1.96*U1j$condsd, length=0.00, col="gray") qqline(U1j$condval,col="steelblue") ############################################################## ###### page 184 residual plots making use of HLMdiag ######### ############################################################## par(mfrow=c(2,2)) res5 <- HLMresid(model5, level=1, type="EB", standardize=TRUE) head(res5) fit <- fitted(model5) # get conditional fitted values plot(fit,res5, xlab='Conditional Fitted Values', ylab='Pearson Std Residuals', main='Conditional Residuals') # # qqplot # qqnorm(res5) # draws plot abline(a=0,b=7.6,col='blue') # reference line # # Histogram of standardized residuals with normal overlay # h<- hist(res5,breaks=15,density=20) # draw historgram xfit <- seq(-20, 20, length=50) # sets range & number of quantiles yfit <- dnorm(xfit, mean=0, sd=6) # should be normal yfit <- yfit*diff(h$mids[1:2])*length(res5) # use mid-points lines(xfit, yfit, col='blue', lwd=2) # draws normal # # Some things to describe the model # plot.new( ) text(.5,1.0,'Model 5') text(.5,0.85,'Devience=3600.1') text(.5,0.65,'AIC=3618.1') text(.5,0.5,'BIC=3656.4') ########## Influence diagnositics using HLMdiag ############# par(mfrow=c(1,1)) cook <- cooks.distance(model5, group="school") dotplot_diag(x=cook, cutoff="internal", name="cooks.distance", ylab=("Cook's distance"), xlab=("School")) mdfit <- mdffits(model5,group="school") dotplot_diag(x=mdfit, cutoff="internal", name="mdffits", ylab=("MDFIts"), xlab=("School")) ########################################################################### # Things from lme4 documentation but not in lecture notes # Should work but doesn't #densityplot(model5) # visualation of confidence intervals: zeta plot2 (see lme4 manual) # blue lines close to linear is good pr5 <- profile(model5) xyplot(pr5, aspect=1.3) xyplot(pr5, aspect=1.3, absVal=TRUE) confint(pr5, level=.99) # Profile pairs --- for visualizing how parameters depend on each other splom(pr5) # prediction intervals on random effects & 95% CIs dotplot(ranef(model5, postVar=TRUE)) qqmath(model5, postVar=TRUE)