# Edps/Psych/Stat 587     
# Spring 2019
# Carolyn J. Anderson
#
# This looks useful:  
# https://rstudio-pubs-static.s3.amazonaws.com/78961_fe5b5c6a77f446eca899afbb32bd1dc7.html

# Version 1 of R code to go with notes on Models for Clustered Data
#
# Fitting random intercept model using Nels10 data
#
#   Install and load lme4 and lattice
library(lme4)
library(lattice)

# Change directory to where the data live and then
setwd("D:\\Dropbox\\edps587\\lectures\\2 anovas")

nels10 <- read.table(file="NELS10.txt", header=TRUE)
names(nels10)
head(nels10)
tail(nels10)

# So can just use variable name without also using data table name
attach(nels10)


# simple descriptive statistics
summary(math)
mean(math)
var(math)

#
# Summary statistics by school
#
mn <- aggregate(math~sch_id, data=nels10,  mean)
sd <- aggregate(math~sch_id, data=nels10,  sd)
med <- aggregate(math~sch_id, data=nels10, median)
simple <- cbind(mn, sd[,2], med[,2])
round(simple,digits=2)

#
# plot mean +/- 1 standard deviation by group
#
# -- a numbers 1 to 10 as index variable
simple$index <- seq(1:10)

plot(simple$index, simple$math, pch=19,
     xlab="School",
	 ylab="Math",
	 xlim=c(min(simple$index),max(simple$index)),
	 ylim=c(30,70),
	 main="School Mean Math Score +/- one Standard Deviation"
	 )
lines(rbind(1:10,1:10,NA),rbind(simple$math-simple[,3],simple$math+simple[,3],NA))


# Plain old linear regression (no predictors)
linreg <- lm(math ~ 1, data=nels10)
linreg                           # basic results
anova(linreg)                    # to get ANOVA table
summary(linreg)                  # summary of stuff
plot(linreg, las=1)              # plots fitted by resdiuals

#
# ANOVA using schools (fixed effects)...this is ***REML***
#
id <- as.factor(sch_id)
fixedANOVA <- lm(math ~ id, data=nels10,REML=TRUE)
summary(fixedANOVA)
anova(fixedANOVA)

#
# ANOVA using schools (fixed effects)...this is ***MLE***
# --- are REML and MLE estimates the same or different? 
# REML=FALSE doesn't seem to work (gives the same as
#            REML=TRUE
#
id <- as.factor(sch_id)
fixedANOVA2 <- lm(math ~ id, data=nels10, REML=FALSE)
summary(fixedANOVA2)
anova(fixedANOVA2)

#
# This fits random intercept model using lmer using REML, which is default
#  (random effects ANOVA)
ranintREML <- lmer(math ~ 1 + (1 | sch_id), data=nels10, REML=TRUE)
summary(ranintREML)

#
# This fits random intercept model using lmer using MLE 
# (random effects ANOVA using MLE)
ranintMLE <- lmer(math ~ 1 + (1 | sch_id), data=nels10, REML=FALSE)
summary(ranintMLE)

(icc <- (30.54)/(30.54 + 72.24))

 
# or icc without cutting and pasting (less rounding error)
# -- save the summary into s as a data frame
s <- summary(ranintMLE)
# -- see what available...we want what's in "varcor" as a data frame
names(s)
(s <- as.data.frame(s$varcor))
(icc2 <- s$vcov[1] / (s$vcov[1] + s$vcov[2]))

#
# Simple linear (normal) regression
#
multreg <- lm(math ~ 1 + homework, data=nels10)
summary(multreg)
anova(multreg)
fitmath <- fitted(multreg)
plot(homework, math, 
      type='p', 
      main='Linear Regression of Math on Time Spent Doing Homework',
      xlab='Time Spent Doing Homework', 
      ylab='Fitted Math Scores from Linear Regression')
lines(homework,fitmath,col="blue")

# Lattice plot with linear regressions
xyplot(math ~ homework | id, col.line='black', 
       type=c('p','r'),
       cex.xlab=1.5,
	   cex.ylab=1.5,
	   xlab="Time Spent Doing Homework",
	   ylab="Math Scores",
	   cex.main=1.2,
       main='Variability in Math ~ Homework relationship')

#
#  This basically fits a separate regression line for each school (id)
#
mathbysch <- lm(formula = math ~ 1*id + homework * id, data = nels10)
summary(mathbysch)
fschmath <- fitted(mathbysch)


# Try 2 works much better (it's more work but figure is correct)  
school.list <- unique(id)
length(school.list)


coef <- coefficients(mathbysch)
intercepts <- c(0, coef[3:11])
slopes <- c(0, coef[12:20])


plot(data = nels10, math~homework,type = 'n', 
     ylim = c(min(math),max(math)),
     xlim = c(min(homework),max(homework)),
     cex.main = 1.25,
     xlab = 'Time Spent Doing Homework', ylab = "Math Scores",
     main = "Separate Regression for Each School")
	 
colors <- c("red", "orange", "darkgoldenrod1","magenta",
                      "green", "blue", "purple",
                      "white", "grey", "black")
for(i in 1:length(school.list)){
  abline(a = (coef[1] + intercepts[i]), b = (coef[2]+slopes[i]),
              col = colors[i])
  par<- par(new=F)
}


#
# Some graphs/plots
#
# Random intercept and Slope  HLM 
#  ---- this is analogous to fitting regression to each school but the school effects are
#  random.  The black line is the average school and the blue ones are the conditional
#  ones (i.e., they include school specific estimates of intercept & slope)
#
#
model2 <- lmer(math ~ homework + ( homework | id), data=nels10, REML=FALSE)
summary(model2)

school.list <- unique(id)
length(school.list)

fixed.effects <- fixef(model2)
rand.effects <- ranef(model2)
params.student <- cbind((rand.effects$id[1]+fix[1]),(rand.effects$id[2]+fix[2]))

plot(data = nels, math~homework,type = 'n', 
     ylim = c(min(math),max(math)),
     xlim = c(min(homework),max(homework)),
     cex.main = .75,
     xlab = 'Time Spent Doing Homework', ylab = "Math Scores",
     main = "Estimated Conditional Models from Random Intercepts & Slope Model")

abline(a=fixed.effects[1], b= fixed.effects[2], col='black', lwd=2)

for(i in 1:length(school.list)){
  abline(a = params.student[i,1], b = params.student[i,2],col = 'blue')
  par<- par(new=F)
}