# This looks useful:  
# https://rstudio-pubs-static.s3.amazonaws.com/78961_fe5b5c6a77f446eca899afbb32bd1dc7.html


# Edps/Psych/Stat 587     
# Fall 2016
# Carolyn J. Anderson
#
# Version 1 of R code to go with notes on Models for Clustered Data
#
# Fitting random intercept model using Nels10 data
#
#   Install and load lme4 and lattice
library(lme4)
library(lattice)

# Change directory to where the data live and then
nels10 <- read.table(file="NELS10.txt", header=TRUE)

head(nels10)
tail(nels10)

# So can just use variable name without also using data table name
attach(nels10)


# simple descriptive statistics
summary(math)
mean(math)
var(math)


# Plain old linear regression (no predictors)
linreg <- lm(math ~ 1, data=nels10)
linreg                           # basic results
anova(linreg)                    # to get ANOVA table
summary(linreg)                  # summary of stuff
plot(linreg, las=1)              # plots fitted by resdiuals

#
# ANOVA using schools (fixed effects)...this is REML
#
id <- as.factor(sch_id)
fixedANOVA <- lm(math ~ id, data=nels10)
summary(fixedANOVA)
anova(fixedANOVA)

#
# ANOVA using schools (fixed effects)...this is MLE (same or different?)
#
id <- as.factor(sch_id)
fixedANOVA2 <- lm(math ~ id, data=nels10, REML=FALSE)
summary(fixedANOVA2)
anova(fixedANOVA2)

#
# This fits random intercept model using lmer using REML, which is default
#  (random effects ANOVA)
ranintREML <- lmer(math ~ 1 + (1 | sch_id), data=nels10, REML=TRUE)
summary(ranintREML)

#
# This fits random intercept model using lmer using MLE 
# (random effects ANOVA using MLE)
ranintMLE <- lmer(math ~ 1 + (1 | sch_id), data=nels10, REML=FALSE)
summary(ranintMLE)

icc <- (30.54)/(30.54 + 72.24)
icc


#
# Simple linear (normal) regression
#
multreg <- lm(math ~ 1 + homework, data=nels10)
summary(multreg)
anova(multreg)
fitmath <- fitted(multreg)
plot(homework, fitmath, type='l', main='Linear Regression of Math on Time Spent Doing Homework',
     xlab='Time Spent Doing Homework', ylab='Fitted Math Scores from Linear Regression')


#
#  This basically fits a separate regression line for each school (id)
#
mathbysch <- lm(formula = math ~ 1*id + homework * id, data = nels10)
summary(mathbysch)
fschmath <- fitted(mathbysch)

# Try 1, but don't want lines from different schools connected
plot(homework, fschmath, type='l', main='Try1: Separate Linear Regression for Each School',
                                   xlab='Time Spent Doing Homework',
                                   ylab='Fitted Math Scores'                                    )

# Try 2 works much better (it's more work but figure is correct)  
school.list <- unique(id)
length(school.list)


coef <- coefficients(mathbysch)
intercepts <- c(0, coef[3:11])
slopes <- c(0, coef[12:20])


plot(data = nels10, math~homework,type = 'n', 
     ylim = c(min(math),max(math)),
     xlim = c(min(homework),max(homework)),
     cex.main = .75,
     xlab = 'Time Spent Doing Homework', ylab = "Math Scores",
     main = "Separate Regression for Each School")

for(i in 1:length(school.list)){
  abline(a = (coef[1] + intercepts[i]), b = (coef[2]+slopes[i]), col = 'blue')
  par<- par(new=F)
}


#
# Some graphs/plots
#
# Random intercept and Slope  HLM 
#  ---- this is analogous to fitting regression to each school but the school effects are
#  random.  The black line is the average school and the blue ones are the conditional
#  ones (i.e., they include school specific estimates of intercept & slope)
#
#
model2 <- lmer(math ~ homework + ( homework | id), data=nels10, REML=FALSE)
summary(model2)

school.list <- unique(id)
length(school.list)

fixed.effects <- fixef(model2)
rand.effects <- ranef(model2)
params.student <- cbind((rand.effects$id[1]+fix[1]),(rand.effects$id[2]+fix[2]))

plot(data = nels, math~homework,type = 'n', 
     ylim = c(min(math),max(math)),
     xlim = c(min(homework),max(homework)),
     cex.main = .75,
     xlab = 'Time Spent Doing Homework', ylab = "Math Scores",
     main = "Estimated Conditional Models from Random Intercepts & Slope Model")

abline(a=fixed.effects[1], b= fixed.effects[2], col='black', lwd=2)

for(i in 1:length(school.list)){
  abline(a = params.student[i,1], b = params.student[i,2],col = 'blue')
  par<- par(new=F)
}