# Edps/Psyc/Stat 587
# Spring 2019
# C.J.Anderson
#
# Model building  
# NELS 23 data --- most of the graphs in the lecture graphing fixed effects 
#
#  Note regarding Packages:
#     stringi    (this needs to be installed before HLMdiag or HLMdiag won't load properly)
#     HLMdiag 
#
# 
# load packages
#
library(lme4)
library(lmerTest)
library(lattice)
library(ggplot2)
library(stringi)

library(HLMdiag)
library(texreg)
library(optimx)

source("D:\\Dropbox\\edps587\\lectures\\6 inference1\\All_functions.txt")

#############################################################################
# read in data into R
# the first row of the data matrix is the variable names so there is a header
# change to your path
#############################################################################

nels<-read.table("D:\\Dropbox\\edps587\\lectures\\8 modelbuilding\\nels23_data.txt",header=TRUE)

#look at first few rows of data set
head(nels)

#
#  Set up
#
nels$public <- ifelse (nels$schtype==1, 1, 0)
nels$white  <- ifelse (nels$race==4, 1, 0)


schMses <- as.data.frame(aggregate(ses~school, data=nels, FUN="mean"))
names(schMses) <- c('school', 'schMses')
nels <- merge(nels,schMses, by=c('school'))
nels$schCses <- nels$ses - nels$schMses

nels$homewrk <- as.numeric(nels$homew)
nels$school  <- as.factor(nels$school)

# I think I am done with data set up (at least for now)

###############################################################
#  Ask for next graph --- "enter" to proceed
###############################################################

par(ask=TRUE)

######################### homework #############################
#
#  lm and overall 95% CI
#
math.homewrk <- lm(math~homewrk, data=nels)
conflm1<-confint(math.homewrk)
plot(nels$homewrk,nels$math,type='p', main='Overall regression with 95% CI')
abline(math.homewrk)
abline(coef=conflm1[,1],lty=2)
abline(coef=conflm1[,2],lty=2)


#
#  Smooth (loess) curve fit to data
#  
# --- fit loess to data
lw1 <- loess(math ~ homewrk,data=nels)
# --- plot data
plot(nels$homewrk,nels$math,
	 main="NELS:Math Score Plotted vs Homework \n with smooth (loess) curve",
	 xlab="Time Spent Doing homework",
	 ylab="Math Scores",
	 cex=1.0,
	 col="black"
	 )	
# --- put in proper order
	j <- order(nels$homewrk)
# --- Add loess to figure	
lines(nels$homewrk[j],lw1$fitted[j],col="blue",lwd=3)

#
# Joint points for each school 
#  ################################
#
# ~ page 25  compute mean and plot by school just connecting points
#
math.hmwk <- aggregate(math ~ homewrk + school, data=nels, FUN="mean")

ggplot(math.hmwk, aes(x=homewrk, y=math, col=school,type='l'),
  main='Regressions all in one plot')  + geom_line() +  theme(legend.position="none")

# --- alternate method 
plot(nels$homewrk,nels$math,
	 main="NELS:Math Score Plotted vs Homework \n joining means for each school",
	 xlab="Time Spent Doing homework",
	 ylab="School Mean Math Scores",
	 cex=1.0,
	 col="black",
	 type="n"
	 )	
# --- put in proper order
	j <- order(math.hmwk$homewrk)
# --- Add to figure	
for (i in unique(math.hmwk$school)) {
  sub <- math.hmwk[which(math.hmwk$school==i),]
  lines(sub$homewrk,sub$math,col=i)
 } 
 

#
# page 32 linear regression math~homewrk + race
#
#
#   Plot 1:  Lattice plot --- math
#
xyplot(math ~ homew | school, data=nels, col.line='black', 
                      type=c('p'),
                      main='Varability in Math ~ homework relationship')
#
# page 19 - 20 Plot 2:  Lattice plot with regressions
#
xyplot(math ~ homew | school, data=nels, col.line='black', 
                      type=c('p','r'),
                      main='Varability in Math ~ homework relationship')
#
# page 23-24  Plot 3:  Lattice plot with smooth regressions  (i.e., loess)
#
xyplot(math ~ homew | school, data=nels, col.line='black', 
                      type=c('p','smooth'),
                      main='Varability in Math ~ homework relationship')
#
#  compute means and plot
#
mathhmwk <- aggregate(math ~ homewrk, data=nels, FUN="mean")
plot(mathhmwk[,1],mathhmwk[,2], type='b',
     xlab="Time Spent Doing Homework",
	 ylab="Mean Math Score",
     main="NELS: Overal Means for Each Level Homework")

#
# page 25  compute mean and plot by school just connecting points
#
math.hmwk <- aggregate(math ~ homewrk + school, data=nels, FUN="mean")

ggplot(math.hmwk, aes(x=homewrk, y=math, col=school,type='l'),
  main='Regressions all in one plot')  + geom_line() +  theme(legend.position="none")
#

#
# page 26  All regressions in one figure
#
ggplot(nels, aes(x=homewrk, y=math, col=school,type='l'),
       main='Regressions all in one plot')  + geom_smooth(method="lm", se=FALSE ) +  theme(legend.position="none")

# Alternate code to put all regressions in a single plot:
#  --- set up graphic frame:
plot(nels$homew,nels$math,
	type="n", 
	col="blue",
	lwd=2,
	main="NELS: Linear Regression by School \n(for where there is data)",
	xlab="Time Spent Doing Homework",
	ylab="Math Scores"
	)
# --- loop through the schools and add line
for (j in unique(nels$school)) {
  sub <- nels[ which(nels$school==j),]
  fitted <- fitted(lm(math~homew,sub))
  lines(sub$homew,fitted,col=j)
}   

#
# page 28  compute mean and plot by white
#
w <- as.factor(nels$white)
math.white <- aggregate(math ~ homewrk + w, data=nels, FUN="mean")

ggplot(math.white, aes(x=homewrk, y=math, col=w, type='l'),
            main='Regressions all in one plot')  + geom_line() 
#
# Alternate that looks nicer:
plot(math.white$homewrk[1:7],math.white$math[1:7],type="l",
     main="Mean Math by Homework",
	 xlab="Time Spent Doing Homework",
	 ylab="Mean Math Score",
	 col="blue")
lines(math.white$homewrk[8:14],math.white$math[8:14],type="l",col="red")
legend("topleft",legend=c("Non-White", "White"),
       col=c("blue","red"), lty=1, cex=1.2)   
#
# page 29 linear regression math~homewrk + white
#
ggplot(math.white, aes(x=homewrk, y=math, col=w, type='l'),
             + geom_smooth(method="lm", se=FALSE ) )
#
# Alternate that looks nicer--- regression on means math and homework by white
#
plot(math.white$homewrk,math.white$math,type="n",
     main="Regression of Math on Homework",
	 xlab="Time Spent Doing Homework",
	 ylab="Mean Math Score",
	 ylim=c(40,70))
subw <- math.white[which(math.white$w==0),]
lm1 <- lm(math ~ homewrk,data=subw)
lines(subw$homewrk,fitted(lm1),col="red")
subw <- math.white[which(math.white$w==1),]
lm2 <- lm(math ~ homewrk,data=subw)
lines(subw$homewrk,fitted(lm2),col="blue")
legend("topleft",legend=c("Non-White", "White"),
       col=c("red","blue"), lty=1, cex=1.2)   
 
#
# Alternate that looks nicer--- regression to whole uncollapsed data set
#
plot(math.white$homewrk,math.white$math,type="n",
     main="Regression of Math on Homework",
	 xlab="Time Spent Doing Homework",
	 ylab="Mean Math Score",
	 ylim=c(40,70))
subw <- nels[which(nels$white==1),]
abline(lm(math~homewrk,data=subw),col="blue")
subn <- nels[which(nels$white==0),]
abline(lm(math~homewrk,data=subn),col="red")
legend("topleft",legend=c("Non-White", "White"),
       col=c("red","blue"), lty=1, cex=1.2)   
}
#
#  page 31  compute mean and plot by race
#
r <- as.factor(nels$race)
math.race <- aggregate(math ~ homewrk + r, data=nels, FUN="mean")

ggplot(math.race, aes(x=homewrk, y=math, col=r, type='l'),
            main='Regressions all in one plot')  + geom_line() 
text(0,77,'1=Asian/PI,',col='red')
text(1.5,77,'2=Hispanic',col='darkolivegreen4')
text(2.75,77,'3=Black',col='green')
text(3.75,77,'4=White',col='blue')
text(5.25,77,'5=Native Amercian',col='purple')

#
# Alternate that looks nicer
#
plot(math.race$homewrk[1:5],math.race$math[1:5],type="l",
     main="Mean Math by Homework",
	 xlab="Time Spent Doing Homework",
	 ylab="Mean Math Score",
	 ylim=c(30,70),
	 xlim=c(0,7),
	 col="blue")
lines(math.race$homewrk[6:11],math.race$math[6:11],type="l",col="red")
lines(math.race$homewrk[12:18],math.race$math[12:18],type="l",col="darkgreen")
lines(math.race$homewrk[19:26],math.race$math[19:26],type="l",col="purple")
lines(math.race$homewrk[27:28],math.race$math[27:28],type="l",col="cyan")
legend("topleft",legend=c("Asian/PI", "Hispanic","Black","White","Native Amercian"),
       col=c("blue","red","darkgreen","purple","cyan"), lty=1, cex=1.2)   

#
# page 32 linear regression math~homewrk + race
#
ggplot(nels, aes(x=homewrk, y=math, col=r, type='l'),
            main='Regressions all in one plot')  + geom_smooth(method="lm", se=FALSE ) 
text(0,77,'1=Asian/PI,',col='red')
text(1.5,77,'2=Hispanic',col='darkolivegreen4')
text(2.75,77,'3=Black',col='darkgreen')
text(3.75,77,'4=White',col='blue')
text(5.25,77,'5=Native Amercian',col='purple')

#
# Nicer version but extends beyond range where there is data
#
par(mfrow=c(1,1))
plot(math.race$homewrk[1:5],math.race$math[1:5],type="n",
     main="Mean Math by Homework",
	 xlab="Time Spent Doing Homework",
	 ylab="Mean Math Score",
	 ylim=c(30,70),
	 xlim=c(0,7))
abline(lm(math.race$math[1:5]~math.race$homewrk[1:5]),type="l",col="blue")
abline(lm(math.race$math[6:11]~math.race$homewrk[6:11]),type="l",col="red")
abline(lm(math.race$math[12:18]~math.race$homewrk[12:18]),type="l",col="darkgreen")
abline(lm(math.race$math[19:26]~math.race$homewrk[19:26]),type="l",col="purple")
abline(lm(math.race$math[27:28]~math.race$homewrk[27:28]),type="l",col="cyan")
legend("topleft",legend=c("Asian/PI", "Hispanic","Black","White","Native Amercian"),
       col=c("blue","red","darkgreen","purple","cyan"), lty=1, cex=1.2)   

#
# page 34  race x homework  to show where there is not much data
#
par(ask=FALSE)
# Does not extend beyond the range of data
#  --- set up graphic frame:
plot(nels$homew,nels$math,
	type="n", 
	col="blue",
	lwd=2,
	main="NELS: Linear Regression by School \n(for where there is data)",
	xlab="Time Spent Doing Homework",
	ylab="Math Scores"
	)
# --- loop through the schools and add line
for (j in nels$race) {
  sub <- nels[ which(nels$race==j),]
  fitted <- fitted(lm(math~homew,sub))
  lines(sub$homew,fitted,col=j)
}
legend("topleft",legend=c("Asian/PI", "Hispanic","Black","White","Native Amercian"),
       col=c("blue","red","darkgreen","purple","cyan"), lty=1, cex=1.2)   
   
  
#
#  Cross-classification of homew x race
#
with(nels, table(homew, race))
 

#
#  page 36 mean math by gender
#
nels$gender <- as.factor(nels$sex)
math.gender <- aggregate(math ~ homewrk + gender, data=nels, FUN="mean")

ggplot(math.gender, aes(x=homewrk, y=math, col=gender, type='l'),
            main='Regressions all in one plot')  + geom_line() 
text(0,77,'1=Male,',col='red')
text(1.5,77,'2=Female',col='lightseagreen')

#
# Nicer looking where fit lines to means
#
plot(math.gender$homewrk,math.gender$math,type="n",
     main="Regression of Math on Homework \n Seperate lines for genders",
	 xlab="Time Spent Doing Homework",
	 ylab="Mean Math Score")
lines(math.gender$homewrk[1:8],math.gender$math[1:8],col="blue")
lines(math.gender$homewrk[9:15],math.gender$math[9:15],col="red")
legend("topleft",legend=c("Female", "Male"),
       col=c("blue","red"), lty=1, cex=1.2)   

   
	   
	   
#
# page 37
#

with(nels,table(sex,homewrk))


#
# page 32 linear regression math~homewrk + race
#
ggplot(nels, aes(x=homewrk, y=math, col=gender, type='l'), 
            main='Regressions all in one plot')  + geom_smooth(method="lm", se=FALSE ) 
text(0,77,'1=Male,',col='red')
text(1.5,77,'2=Female',col='lightseagreen')
#
# Nicer looking with linear regressions
#
#
# Alternate that looks nicer but extends beyond data
#
plot(math.gender$homewrk,math.gender$math,type="n",
     main="Regression of Mean Math on Homework \n Seperate lines for genders",
	 xlab="Time Spent Doing Homework",
	 ylab="Mean Math Score",
	 ylim=c(45,70))
abline(lm(math[1:7]~homewrk[1:7],data=nels),col="blue")
abline(lm(math[8:14]~homewrk[8:14],data=nels),col="red")
legend("topleft",legend=c("Female", "Male"),
       col=c("blue","red"), lty=1, cex=1.2)   

# Does not extend beyond the range of data and fits lines to uncollapsed data
#  --- set up graphic frame:
plot(nels$homew,nels$math,
	type="n", 
	col="blue",
	lwd=2,
	main="NELS: Linear Regression by School by Gender \n(for where there is data)",
	xlab="Time Spent Doing Homework",
	ylab="Math Scores"
	)
# --- loop through the schools and add line
for (j in nels$sex) {
  sub <- nels[ which(nels$sex==j),]
  fitted <- fitted(lm(math~homew,sub))
  lines(sub$homew,fitted,col=j)
}  
	
##############################################
# SES 
##############################################

#
# page 41-42 Lattice plot with regressions
#
xyplot(math ~ ses | school, data=nels, col.line='black', 
                      type=c('p','r'),
                      main='Varability in Math ~ ses relationship')
  
 
#
# page 43-44  Plot 3:  Lattice plot with smooth regressions  (i.e., loess)
#
xyplot(math ~ ses | school, data=nels, col.line='black', 
                      type=c('p','smooth'),
                      main='Varability in Math ~ ses relationship')


#
# page 45  All regressions in one figure
#

ggplot(nels, aes(x=ses, y=math, col=school,type='l'),
       main='Regressions all in one plot')  + geom_smooth(method="lm", se=FALSE ) +  theme(legend.position="none")

# Alternate 
#  --- set up graphic frame:
par(ask=FALSE)
plot(nels$homew,nels$ses,
	type="n", 
	col="blue",
	lwd=2,
	main="NELS: Linear Regression by School \n(for where there is data)",
	xlab="Socio-econmic status (SES)",
	ylab="Math Scores",
	ylim=c(30,70),
	xlim=c(-2.5,2.5)
	)
# --- loop through the schools and add line
for (j in unique(nels$school)) {
  sub <- nels[ which(nels$school==j),]
  sub2 <- sub[order(sub$ses),]
  fitted <- fitted(lm(math~ses,sub2))
  lines(sub2$ses,fitted,col=j)
}     
	

#
#  create 10 groups with about the same number per group
#
nels$ses.grp <- as.numeric(cut(nels$ses, 10))
nels$ses.grp <- as.factor(nels$ses.grp)

mean.ses.grp <- aggregate(ses~ses.grp, data=nels, FUN="mean")
mean.math.grp<- aggregate(math~ses.grp, data=nels, FUN="mean")

# Plot grouped data
plot(mean.ses.grp$ses,mean.math.grp$math, type='b',
     main="Grouped SES:  math mean vs ses mean",
	 xlab="Mean SES",
	 ylab="Mean Math",
	 col="blue"	 )

# Plot math vs SES
plot(nels$ses,nels$math,type='p')	 
	 
###################### Pick up here ########################
#   Need to use grouped SES for nicer figures
############################################################
#  Page 48: linear regression math ~ ses + gender
#
nels$gender <- as.factor(nels$sex)

ggplot(nels, aes(x=ses, y=math, col=gender, type='l'), 
            main='Regressions all in one plot')  + geom_smooth(method="lm", se=FALSE ) 
text(0,77,'1=Male,',col='red')
text(1.5,77,'2=Female',col='lightseagreen')

# -- set up for nicer looking figure
nels$ses.grp <- as.numeric(cut(nels$ses, 10))
nels$gender <- as.factor(nels$sex)
math.gender <- aggregate(math ~ ses.grp + gender, data=nels, FUN="mean")
ses.gender <- aggregate(ses ~ ses.grp +  gender, data=nels, FUN="mean")  
# -- plot nicer one
plot(ses.gender$ses[1:10],math.gender$math[1:10],type="l",lwd=2,col="red",
     main="Join Mean Math on Grouped SES \n Seperate lines for genders",
	 xlab="Socio-econmic Status",
	 ylab="Mean Math Score",
	 ylim=c(30,70),
	 xlim=c(-2.5,2.5))
lines(ses.gender$ses[11:20],math.gender$math[11:20],col="lightseagreen",lwd=2)
legend("topleft",legend=c("Male","Female"),
       col=c("red","lightseagreen"), lty=1, lwd=2, cex=1.2)   

## Linear regressions
plot(nels$ses,nels$math,type="n",
     main="Regression of Math on SES \n Seperate lines for genders",
	 xlab="Socio-econmic Status",
	 ylab="Mean Math Score",
	 ylim=c(30,70),
	 xlim=c(-2.5,2.5))
abline(lm(math~ses ,data=nels[which(nels$sex==1),]),col="red",lwd=2)
abline(lm(math~ses ,data=nels[which(nels$sex==2),]),col="seagreen",lwd=2)
legend("topleft",legend=c("Male","Female"),
       col=c("red","seagreen"), lty=1, lwd=2, cex=1.2)   

#
#  Page 50 linear regression:  math ~ ses + white
#
w <- as.factor(nels$white)
math.white <- aggregate(math ~ ses.grp + w, data=nels, FUN="mean")

ggplot(math.white, aes(x=ses, y=math, col=w, type='l'),
            main='Regressions all in one plot')  + geom_smooth(method="lm", se=FALSE ) 

text(0,77,'1=Non-White,',col='red')
text(1.5,77,'2=White',col='lightseagreen')

#
# Page 52:  math ~ race, ses 
#
nels$r <- as.factor(nels$race)
ggplot(nels, aes(x=ses, y=math, col=r, type='l'),
            main='Regressions all in one plot')  + geom_smooth(method="lm", se=FALSE ) 
text(-2,77,'1=Asian/PI,',col='red')
text(-1.1,77,'2=Hispanic',col='darkolivegreen4')
text(-.25,77,'3=Black',col='green')
text(.4,77,'4=White',col='blue')
text(1.5,77,'5=Native Amercian',col='purple')


#
# Page 52:  math ~ sector, ses 
#
nels$sector <- as.factor(nels$public)

ggplot(nels, aes(x=ses, y=math, col=sector, type='l'), + geom_smooth(method="lm", se=FALSE ) )
text(0,77,'0=Private,',col='red')
text(1.5,77,'1=Public',col='lightseagreen')

###
# -- set up for nicer looking figure
nels$ses.grp <- as.numeric(cut(nels$ses, 10))
nels$sector <- as.factor(nels$public)
math.sector <- aggregate(math ~ ses.grp + sector, data=nels, FUN="mean")
ses.sector <- aggregate(ses ~ ses.grp +  sector, data=nels, FUN="mean")  
# -- plot nicer one
plot(ses.sector$ses[1:8],math.sector$math[1:8],type="l",lwd=2,col="red",
     main="Join Mean Math on Grouped SES \n Seperate lines for Sectors",
	 xlab="Socio-econmic Status",
	 ylab="Mean Math Score",
	 ylim=c(30,70),
	 xlim=c(-2.5,2.5))
lines(ses.sector$ses[9:18],math.sector$math[9:18],col="blue",lwd=2)
legend("topleft",legend=c("Private","Public"),
       col=c("red","blue"), lty=1, lwd=2, cex=1.2)   


# Smoother look using linear regression
# -- plot nicer one
plot(ses.sector$ses[1:8],math.sector$math[1:8],type="n",
     main="Linear Regresson Mean Math on SES \n Seperate lines for Sectors",
	 xlab="Socio-econmic Status",
	 ylab="Mean Math Score",
	 ylim=c(30,70),
	 xlim=c(-2.5,2.5))
abline(lm(math~ses ,data=nels[which(nels$public==0),]),col="red",lwd=2)
abline(lm(math~ses ,data=nels[which(nels$public==1),]),col="blue",lwd=2)
legend("topleft",legend=c("Private","Public"),
       col=c("red","blue"), lty=1, lwd=2, cex=1.2)   



################# school mean centered ses #####################

#
# page 55-56 Lattice plot with regressions
#
xyplot(math ~ schCses | school, data=nels, col.line='black', 
                      type=c('p','r'),
                      main='Varability in Math ~ ses relationship')
  
 
#
# Lattice plot with smooth regressions  (i.e., loess)
#
xyplot(math ~ schCses | school, data=nels, col.line='black', 
                      type=c('p','smooth'),
                      main='Varability in Math ~ ses relationship')


#
# page 57  All regressions in one figure
#

ggplot(nels, aes(x=schCses, y=math, col=school,type='l'),
       main='Regressions all in one plot')  + geom_smooth(method="lm", se=FALSE ) +  theme(legend.position="none")



#
# page 58 linear regression math~ses + white
#
w <- as.factor(nels$white)
ggplot(nels, aes(x=schCses, y=math, col=w, type='l'),
            main='Regressions all in one plot')  + geom_smooth(method="lm", se=FALSE ) 


#
# page 64 linear regression math~ses+ sector
#
sector <- as.factor(public)
ggplot(nels, aes(x=schCses, y=math, col=sector, type='l'),
            main='Regressions all in one plot')  + geom_smooth(method="lm", se=FALSE ) 
text(0,77,'0=Private,',col='red')
text(1.5,77,'1=Public',col='lightseagreen')




######################### homework + sector ###########################

#
# page 60 linear regression math~homewrk+ sector
#
nels$sector <- as.factor(nels$public)
ggplot(nels, aes(x=homewrk, y=math, col=sector, type='l'),
            main='Regressions all in one plot')  + geom_smooth(method="lm", se=FALSE ) 
text(0,77,'0=Private,',col='red')
text(1.5,77,'1=Public',col='lightseagreen')



########################## R2 and meta R2 ###########################

#
#
# Regression to each  ***************** seems to be working but not same as in notes 
#
#
nj <- table(nels$school)              # number level 1 units per cluster
nj <- as.numeric(nj)
nclusters <- length(unique(nels$school))
csum <- cumsum(table(nels$school))      # cumulative frequencies

cut2 <- csum                       # end index
cut1 <- c(0, csum) + 1             # start index
cut1 <- cut1[1:nclusters]

#
#  Simple regression
#
ssmodel <- (0)
sstotal <- (0)
R2 <- (99)
for (i in 1:nclusters){
     sesBywhite <- nels$schCses[cut1[i]:cut2[i]]*nels$white[cut1[i]:cut2[i]]

    model0 <- lm(math[cut1[i]:cut2[i]] ~ 1 + homewrk[cut1[i]:cut2[i]] + schCses[cut1[i]:cut2[i]]
                                           + white[cut1[i]:cut2[i]]   +sex[cut1[i]:cut2[i]] , data=nels  )
    R2 <- rbind(R2,summary(model0)$r.squared)
    a <- anova(model0)
    if (a[4,2] >0) {
    ssmodel <- ssmodel + a[1,2] + a[2,2] + a[3,2] + a[4,2] }
    sstotal <- sstotal + sum(a[,2])

}

R2meta1 <- ssmodel/sstotal
R2.mod1   <- R2[2:length(R2)]
plot(nj, R2.mod1, type='p',
     ylim = c(0, 1),
     main='Simple model')
abline(h=R2meta1, col='blue')
text(50,0.95,'R2meta1=.53')



#
#  a Little more complex regression
#
R2 <- (99)
for (i in 1:nclusters){
     sesBywhite <- nels$schCses[cut1[i]:cut2[i]]*nels$white[cut1[i]:cut2[i]]
     model0 <- lm(math[cut1[i]:cut2[i]] ~ 1 + homewrk[cut1[i]:cut2[i]] + schCses[cut1[i]:cut2[i]]
                                            + white[cut1[i]:cut2[i]]   + sex[cut1[i]:cut2[i]]   + sesBywhite, data=nels)
    R2 <- rbind(R2,summary(model0)$r.squared)
       }



R2.mod2   <- R2[2:length(R2)]
plot(nj, R2.mod2, type='p',
     ylim = c(0, 1),
     main='A little more complex')

#
# Compare fits
#

plot(R2.mod1,R2.mod2, pch=20,
     ylim=c(0,1),
     xlim=c(0,1),
     ylab='more complex',
     xlab='simpler model',
     main='The impact of adding gender, mean ses, schsize')
abline(a=0,b=1, col='blue')                 # adds a reference line


################################################################
# Model fitting
################################################################

# Empty/baseline
summary(model0 <- lmer(math ~ (1  | school), data=nels, REML=FALSE,control = lmerControl(optimizer ="Nelder_Mead")))
(icc <- 24.85/(24.85+81.24))

# page 141-145 Preliminary lmm
summary( model1 <- lmer(math ~ homew + schCses + sex + white + schCses*white + schMses + public 
                 + (1 + homew | school), data=nels, REML=FALSE,
				 control = lmerControl(optimizer ="Nelder_Mead")))
screenreg(model1)
bic.hlm(model1,nels$school)

# page 141-145 Preliminary lmm & and other random effects
summary( model1b <- lmer(math ~ homew + schCses + sex + white + schCses*white + schMses + public 
                 + (1 + homew + schCses| school), data=nels, REML=FALSE,
				 control = lmerControl(optimizer ="Nelder_Mead")))
plot(ranef(model1b),main="Estimated Random Effects",col="blue")
bic.hlm(model1b,nels$school)((


summary( model1c <- lmer(math ~ homew + schCses + sex + white + schCses*white + schMses + public 
                 + (1 + homew + sex| school), data=nels, REML=FALSE,
				 control = lmerControl(optimizer ="Nelder_Mead")))
bic.hlm(model1c,nels$school)
plot(ranef(model1c),main="Estimated Random Effects",col="blue")


summary( model1d <- lmer(math ~ homew + schCses + sex + white + schCses*white + schMses + public 
                 + (1 + homew + white| school), data=nels, REML=FALSE,
				 control = lmerControl(optimizer ="Nelder_Mead")))
plot(ranef(model1d),main="Estimated Random Effects",col="blue")
bic.hlm(model1d,nels$school)

				 
				 
# test random slope---null model
summary(model2 <- lmer(math ~ homew + schCses + sex + white + schCses*white + schMses + public + (1 | school), data=nels, REML=FALSE,control = lmerControl(optimizer ="Nelder_Mead")))
(lr <- 3673.6 - 3598.3  )
(pvalue <- .5*(pchisq(lr,1,lower.tail=FALSE)) + .5*(pchisq(lr,2,lower.tail=FALSE)))

# page 151 Sector not significant--remove schMses
summary(model3 <- lmer(math ~ homew + schCses + sex + white + schCses*white  + public + (1 + homew | school), 
       data=nels, REML=FALSE,control = lmerControl(optimizer ="Nelder_Mead")))

texreg(list(model3),single.row=TRUE,custom.model.names=c("Why Secter was ns"))
anova(model3,model1)
bic.hlm(model3,nels$school)
bic.hlm(model1,nels$school)

### page 151 For LR test for sex & public
summary(model1.reduced <- lmer(math ~ homew + schCses + white + schCses*white + schMses 
                 + (1 + homew | school), data=nels, REML=FALSE,
				 control = lmerControl(optimizer ="Nelder_Mead")))
anova(model1.reduced,model1)
bic.hlm(model1.reduced,nels$school)
texreg(list(model1.reduced),single.row=TRUE,custom.model.names=c("Simpler Model"))
deviance(model1.reduced)


# page 156 
summary(model4 <- lmer(math ~ homew + schCses + sex + white + schCses*white  + schMses + (1 + homew | school),  data=nels, REML=FALSE,control = lmerControl(optimizer ="Nelder_Mead")))


# page 157 no ses x white
summary(model5 <- lmer(math ~ homew + schCses + white + schMses + (1 + homew | school), 
         data=nels, REML=FALSE,control = lmerControl(optimizer ="Nelder_Mead")))

# summary of model fits
anova(model5,model1.reduced)
texreg(list(model5),single.row=TRUE,custom.model.names=c("Final Model"))
bic.hlm(model5,nels$school)
deviance(model5)

screenreg(list(model0,model1,model2,model3,model4,model5))


###############################################################
# Graph of EBLUPs by school +/- 1 sd                          #
###############################################################
# step 1  extract random effects ...this gives a mer object
ranU <- as.data.frame(ranef(model5))

# step 2 put in separate objects
U0j <- ranU[which(ranU$term=="(Intercept)"),]
U1j <- ranU[which(ranU$term=="homew"),]

# step 3 sort by ranU which is "condval"
U0j <-U0j[order(U0j$condval),]
U1j <-U1j[order(U1j$condval),]

# step 4 Add intergers for schools so that I can plot in order
U0j$xgrp <- seq(1:23)
U1j$xgrp <- seq(1:23)

# setp 5 Draw graph for U0j
plot(U0j$xgrp,U0j$condval,type="p",
     main="Final Model: Random intercepts +/- 1 std error",
	 xlab="Schools (sort by Uo)",
	 ylab="EBLUP of U0j",
	 col="blue")
	 
# step 6 For sd errors, dDraw arrows but with no arrowhead...A hack solution
arrows(U0j$xgrp, U0j$condval-U0j$condsd,  
       U0j$xgrp, U0j$condval+U0j$condsd, 
	   length=0.00)

# Now for U1j:
plot(U1j$xgrp,U1j$condval,type="p",
     main="Final Model: Random slopes +/- 1 std error",
	 xlab="Schools (sort by U1j)",
	 ylab="EBLUP of U1j",
	 col="blue")
	 
# step 6 For sd errors, dDraw arrows but with no arrowhead...A hack solution
arrows(U1j$xgrp, U1j$condval-U1j$condsd,  
       U1j$xgrp, U1j$condval+U1j$condsd, 
	   length=0.00)
	   
##############################################################
# qq plot U0j and U1j                                        #
##############################################################
qqdata <- qqnorm(U0j$condval)
plot(qqdata$x,qqdata$y,type="p",pch=19,col="red",
main="Normal QQ plot of U0j with 95% Confidence Bands",
     xlab="Theoretical Value",
     ylab="Estimated U0j (EBLUPS)",
     ylim=c(-15,15))
ylab="Estimated U0j",)
arrows(qqdata$x, qqdata$y - 1.96*U0j$condsd, 
       qqdata$x, qqdata$y + 1.96*U0j$condsd, 
	length=0.00, col="gray")
qqline(U0j$condval,col="steelblue")

qqdata <- qqnorm(U1j$condval)
plot(qqdata$x,qqdata$y,type="p",pch=19,col="red",
main="Normal QQ plot of Uuj with 95% Confidence Bands",
     xlab="Theoretical Value",
     ylab="Estimated Uuj (EBLUPS)",
     ylim=c(-8,8))
ylab="Estimated U1j",)
arrows(qqdata$x, qqdata$y - 1.96*U1j$condsd, 
       qqdata$x, qqdata$y + 1.96*U1j$condsd, 
	length=0.00, col="gray")
qqline(U1j$condval,col="steelblue")


##############################################################
###### page 184 residual plots making use of HLMdiag #########
##############################################################

par(mfrow=c(2,2))

res5 <- HLMresid(model5, level=1, type="EB", standardize=TRUE)
head(res5)


fit <- fitted(model5)                          # get conditional fitted values

plot(fit,res5, 
     xlab='Conditional Fitted Values',
     ylab='Pearson Std Residuals',
     main='Conditional Residuals')

#
# qqplot
#

qqnorm(res5)                               # draws plot
abline(a=0,b=7.6,col='blue')                 # reference line

#
# Histogram of standardized residuals with normal overlay
#

h<- hist(res5,breaks=15,density=20)              # draw historgram
xfit <- seq(-20, 20, length=50)                    # sets range & number of quantiles
yfit <- dnorm(xfit, mean=0, sd=6)            # should be normal
yfit <- yfit*diff(h$mids[1:2])*length(res5) # use mid-points 
lines(xfit, yfit, col='blue', lwd=2)             # draws normal


#
#  Some things to describe the model
#

plot.new( ) 
text(.5,1.0,'Model 5')
text(.5,0.85,'Devience=3600.1')
text(.5,0.65,'AIC=3618.1')
text(.5,0.5,'BIC=3656.4')


########## Influence diagnositics using HLMdiag #############

par(mfrow=c(1,1))

cook <- cooks.distance(model5, group="school")

dotplot_diag(x=cook, cutoff="internal", 
name="cooks.distance", ylab=("Cook's distance"), xlab=("School"))


mdfit <- mdffits(model5,group="school")

dotplot_diag(x=mdfit, cutoff="internal", name="mdffits", 
	ylab=("MDFIts"), xlab=("School"))




###########################################################################
# Things from lme4 documentation but not in lecture notes

# Should work but doesn't
#densityplot(model5)

# visualation of confidence intervals:  zeta plot2 (see lme4 manual)

# blue lines close to linear is good

pr5 <- profile(model5)

xyplot(pr5, aspect=1.3)

xyplot(pr5, aspect=1.3, absVal=TRUE)

confint(pr5, level=.99)


# Profile pairs --- for visualizing how parameters depend on each other
splom(pr5)

# prediction intervals on random effects & 95% CIs
dotplot(ranef(model5, postVar=TRUE))

qqmath(model5, postVar=TRUE)