# Edps/Psych/Stat 587 # Computer lab 4 template # Spring 2019 # # C.J.Anderson # # # # Packages: # lme4 # lmerTest # lattice # ggplot2 # stringi (this needs to be installed before HLMdiag or # HLMdiag won't load properly) # texreg # optimx # (install) and load packages library(lme4) library(lmerTest) library(lattice) library(ggplot2) library(stringi) library(HLMdiag) library(texreg) library(optimx) ############################################################################# # read in data into R # the first row of the data matrix is the variable names so there is a header # change to your path ############################################################################# lab4<-read.table("D:/Dropbox/edps587/Lab_and_homework/Computer lab4/lab3.txt",header=TRUE) #look at first few rows of data set head(lab4) ################################################## # Set-up -- from before ################################################## # Use if statment to create "boy" lab4$boy = ifelse(lab4$gender=="boy", 1, 0) # Create Third lab4$third <- ifelse(lab4$grade==3, 1, 0) # School mean Center math and school mean math grpMmath <- as.data.frame(aggregate(math~idschool, data=lab4, "mean")) names(grpMmath) <- c('idschool', 'grpMmath') lab4 <- merge(lab4,grpMmath, by=c('idschool')) lab4$grpCmath <- lab4$math - lab4$grpMmath # # make school id a factor variable # lab4$idschool<-as.factor(lab4$idschool) # # Re-scale for latter use # lab4$gcmath <- scale(lab4$grpCmath, center=FALSE, scale=TRUE) lab4$gmmath <- scale(lab4$grpMmath, center=FALSE, scale=TRUE) ################# # # # Comand to pause between figures where ENTER # gives next one. # # par(ask=TRUE) ################ #################################################### # Get random sample of 25 schools #################################################### # # Look at individuals within schools # #################################################### groups <- unique(lab4$idschool)[sample(1:145,25)] subset <- lab4[lab4$idschool%in%groups,] # Plot 2: Lattice plot with regressions xyplot(science ~ math | idschool, data=subset, col.line='black', type=c('p','r'), main='Variability in Science ~ math relationship') # Plot 3: Lattice plot with smooth xyplot(science ~ math | idschool, data=subset, col.line='black', type=c('p','smooth'), main='Variability in Science ~ math relationship') ############################################################ # All regressions in one plot: science ~ math ############################################################ ggplot(lab4, aes(x=math, y=science, col=idschool,type='l'), main='Regressions all in one plot') + geom_smooth(method="lm", se=FALSE ) + theme(legend.position="none") y <- aggregate(science~hoursTV, data=lab4, "mean") y # take a look at them plot(y[,1], y[,2], type='b', ylab='Mean Science', xlab='Hours Watching TV', main='Marginal of Science x HoursTV') # Linear regressions ggplot(lab4, aes(x=hoursTV, y=science, col=idschool,type='l')) + geom_smooth(method="lm", se=FALSE ) + theme(legend.position="none") # smooth regressions ggplot(lab4, aes(x=hoursTV, y=science, col=idschool,type='l')) + geom_smooth(method="loess", se=FALSE ) + theme(legend.position="none") ######################################################## # Only have to do this once for the regressions. # This sets up index to identify each school's data. ######################################################## nclusters <- length(unique(lab4$idschool)) # number of schools nj <- table(lab4$idschool) # number per school nj <- as.data.frame(nj) names(nj) <- c("idschool","nj") # For a simple model: science ~ math ssmodel <- (0) sstotal <- (0) R2 <- matrix(99,nrow=nclusters,ncol=2) # create matrix to hold Rsquares index <-1 for (i in (unique(lab4$idschool))) { sub <- lab4[ which(lab4$idschool==i),] model0 <- lm(science ~ math, data=sub) a <- anova(model0) # need sums of squares ssmodel <- ssmodel + a[1,2] # takes first one fo model sstotal <- sstotal + sum(a[,2]) # takes sum of all SS for total R2[index,1:2] <- as.numeric(cbind(i,summary(model0)$r.squared)) # collect R2 in object R2 index <- index+1 } R2meta <- ssmodel/sstotal R2 <- as.data.frame(R2) names(R2) <- c("idschool","R2") R2.mod1 <- merge(R2,nj,by=c("idschool")) plot(R2.mod1$nj,R2.mod1$R2,type='p', ylim=c(0,1), ylab="R squared", xlab="n_j (number of observations per school)" , main="Science ~ math") # scatter plot abline(h=R2meta,col='blue') # adds horzontal line for R2meta text(70,0.95,'R2meta=.36',col="blue") # text of R2meta's value # # fit regress using hours as numeric: sicence ~ math + hoursTV + hourscomputergames # ssmodel <- (0) sstotal <- (0) R2 <- matrix(99,nrow=nclusters,ncol=2) # create matrix to hold Rsquares index <- 1 for (i in (unique(lab4$idschool))) { sub <- lab4[ which(lab4$idschool==i),] model0 <- lm(science ~ 1 + math + hoursTV +hourscomputergames,data=sub) a <- anova(model0) ssmodel <- ssmodel + a[1,2] + a[2,2] + a[3,2] sstotal <- sstotal + sum(a[,2]) R2[index,1:2]<- as.numeric(cbind(i,summary(model0)$r.squared)) index <- index+1 } R2meta <- ssmodel/sstotal R2 <- as.data.frame(R2) names(R2) <- c("idschool","R2") R2.mod2 <- merge(R2,nj,by=c("idschool")) plot(R2.mod2$nj, R2.mod2$R2, type='p', ylim=c(0,1), ylab="R squared", xlab="n_j (number of observations per school)" , main='Hours TV as numeric Variable') abline(h=R2meta, col='blue') # adds line for R2meta text(70,0.95,'R2meta=.39',col="blue") # text of R2meta value # # makes hours a factor & give new name lab4$hrsTV <- as.factor(lab4$hoursTV) lab4$hrscg <- as.factor(lab4$hourscomputergames) # # Fit regression to each school using hours as factor: science ~ math + hrsTV + hrscg # ssmodel <- (0) sstotal <- (0) R2 <- matrix(99,nrow=nclusters,ncol=2) # create matrix to hold Rsquares index <- 1 for (i in (unique(lab4$idschool))) { sub <- lab4[ which(lab4$idschool==i),] model0 <- lm(science ~ 1 + math + hrsTV+ hrscg,data=sub) a <- anova(model0) ssmodel <- ssmodel + a[1,2] + a[2,2] + a[3,2] sstotal <- sstotal + sum(a[,2]) R2[index,1:2]<- as.numeric(cbind(i,summary(model0)$r.squared)) index <- index+1 } R2meta <- ssmodel/sstotal R2 <- as.data.frame(R2) names(R2) <- c("idschool","R2") R2.mod3 <- merge(R2,nj,by=c("idschool")) plot(R2.mod3$nj, R2.mod3$R2, type='p', ylim=c(0,1), ylab="R squared", xlab="n_j (number of observations per school)" , main='Hours TV as categorical Variable') abline(h=R2meta, col='blue') # adds line for metaR2 text(70,0.95,'R2meta=.47',col="blue") # add text for meta R2 # # Compare models: hours discrete by hours numeric # plot(R2.mod2$R2,R2.mod3$R2, pch=20,cex=1.4, ylim=c(0,1), xlim=c(0,1), ylab='Hours as Discrete', xlab='Hours as Numeric', main='Hours Discrete vs Numeric') abline(a=0,b=1, col='blue') # adds a reference line ############################ # Diagonstic plots ############################ lab4$hoursTV <- as.numeric(lab4$hoursTV) lab4$hrsTV <- ifelse(lab4$hoursTV<5,0,1) table(lab4$hrsTV,lab4$hoursTV) # to check re-coding lab4$hourscomputergames <- as.numeric(lab4$hourscomputergames ) lab4$hrscg <- ifelse(lab4$hourscomputergames<5,0,1) table(lab4$hrscg,lab4$hourscomputergames) # to check re-coding modelxx <- lmer(science ~ 1 + gcmath + gmmath + hrsTV + hrscg + ( 1 + gcmath|idschool), lab4, REML=FALSE, control = lmerControl(optimizer ="Nelder_Mead")) plot(modelxx, xlab='Fitted Conditional', ylab='Pearson Residuals') fit <- fitted(modelxx) # get conditional fitted values xfit <- seq(-40, 40, length=50) # sets range & number quantiles yfit <- dnorm(xfit, mean=0, sd=7.177) # should be normal yfit <- yfit*diff(h$mids[1:2])*length(res1)# use mid-points lines(xfit, yfit, col='darkblue', lwd=2) # draws normal mdfit <- mdffits(modelxx,group="idschool") dotplot_diag(x=mdfit, cutoff="internal", name="mdffits", ylab=("MDFIts"), xlab=("School")) screenreg(modelxx)