#  Edps/Psych/Stat  587
#  Spring 2019
#  Carolyn J Anderson
# 
#  All analyses in lecture on Random Intercept Models in R   (High School & Beyond Ones)
#
#  
#  I relied heavliy on Introduction to Multilevel Modeling in R by Grover, Guillermono and Hudson (2015)
#   downloaded from https://rstudio-pubs-static.s3.amazonaws.com/78961_fe5b5c6a77f446eca899afbb32bd1dc7.html
#
#  For p-value and different degrees of freedom using lmerTest package see 
#       http://mindingthebrain.blogspot.com/2014/02/three-ways-to-get-parameter-specific-p.html
#  Below I use lmerTest, which give you Satterthwaited df and p-values for HLM (these are same 
#    as given by SAS)
#
#
# Load Packages:
#          lme4
#          lattice
#          lmrTest 
#          texreg --- for nicer output

#
# Set working directory directory
#
setwd('D:\\Dropbox\\edps587\\lectures\\3 randomintercept')

############################################################################
#  Data set up: merge hsb1 and hsb2                                        #
############################################################################
#
# Read in student level data
#
hsb1 <- read.table(file="HSB1data.txt", header=TRUE)

# Top & bottom of the file
#
head(hsb1)
tail(hsb1)


#
# Read in school level data
#
hsb2 <- read.table(file="HSB2data.txt", header=TRUE)

# Top & bottom of the file 
#
head(hsb2)
tail(hsb2)

#                                                               #
# Make long matrix                                              #
#                                                               #   
#  Already Sorted by school id                                  #
#                                                               #
# Merging student & school level files by school id ('id')      #
#                                                               #
hsb <- merge(hsb1,hsb2, by=c('id'))
head(hsb)


#
# Attach to make variables easier to name 
#

attach(hsb)



#########################################################################
# Some information about the data    (e.g., descriptive statistics      #
#########################################################################

#
# Number of schools
nschools <- nrow(hsb2)
nschools

#
# List of schools
school.list <- hsb2$id

#
# Overall summary statistics
#
summary(mathach)
sd(mathach)
mean(mathach)

summary(ses)
sd(ses)

#
# Summary descriptive statistics by School
#
mmean <- aggregate(mathach~id, data=hsb, FUN="mean")
msd <- aggregate(mathach~id, data=hsb, sd)
mmin <- aggregate(mathach~id, data=hsb, min)
mmax <- aggregate(mathach~id, data=hsb, max)
math.stat <- cbind(mmean,msd[,2], mmin[,2], mmax[,2])

names(math.stat) <- c('Mean Math', 'SD math', 'Min', 'Max')
math.stat


####################################################
# Some graphing                                    #
####################################################

#
# Plot of mean ses by id --> variability between schools
#


plot(id, meanses, type='p', pch=21,
     main='HSB: Group Mean SES by School ID',
     xlab='School ID',
     ylab='Group Mean SES')

#
# Plot of student ses by id --> variability within schools
#


plot(id, ses, type='p', pch=21,
     main='HSB: Student SES by School ID',
     xlab='School ID',
     ylab='Group Mean SES')

#
#  Just one school
#

school.1224 <- hsb[ which(id==1224), ]


plot(school.1224$ses, school.1224$mathach, type='p', pch=21,
     main="HSB: Math by SES for School 1224",
     xlab='Student SES',
     ylab='Math Achievement')
reg.1224 <- lm(school.1224$mathach~school.1224$ses, data=school.1224)
abline(reg.1224)


#
# Spanel plot of a random sample of 20
#

# Will need this for graph 

cses <- ses - meanses
hsb <- cbind(hsb,cses)



groups <- unique(hsb$id)[sample(1:160,20)]

subset <- hsb[hsb$id%in%groups,]

xyplot(mathach ~ ses | id, data=subset, col.line='black', 
                      type=c('p'),
                      main='Varability in Math ~ SES relationship')
  



#
# panel plot of a random sample of 20 with regression lines
#

xyplot(mathach ~ ses | id, data=subset, col.line='black', 
                      type=c('p','r'),
                      main='Varability in Math ~ SES relationship')
  

#
#  Panel Now with school centered ses
#
 

xyplot(mathach ~ cses | id, data=subset, col.line='black', 
                      type=c('p','r'),
                      main='Math Achievement by School Centered SES',
                      xlab='School Centered SES',
                      ylab='Math Achievement Test Scores')
  


# 
# All regression lines in one plot
#

schid <- as.factor(id)

mathbysch <- lm(formula = mathach ~ 1*schid + ses*schid, data = hsb)
summary(mathbysch)

fschmath <- fitted(mathbysch)
school.list <- unique(schid)
length(school.list)


coef <- coefficients(mathbysch)
intercepts <- c(0, coef[3:161])
slopes <- c(0, coef[162:320])


# Plot all regressions in one plot
plot(data = hsb, mathach~ses,type = 'n', 
     ylim = c(-10, 30),
     xlim = c(-3, 3),
     cex.main = 1.5,
     xlab = 'SES', 
     ylab = "Math Scores",
     main = "Separate Regression for Each School "
     )

for(i in 1:length(school.list)){
  abline(a = (coef[1] + intercepts[i]), b = (coef[2]+slopes[i]), col = 'blue')
  par<- par(new=F)
}
abline(v=c(0), type='l', lwd=2, color='black')





###########################################################   
# Create and add mean meanminority & meanfemale to data   #
###########################################################

# The data file already has mean SES but need mean center ses


meanminority  <- aggregate(minority ~ id , data=hsb, FUN="mean")
names(meanminority) <- c('id', 'meanminority')

hsb <- merge(hsb,meanminority, by =c('id'))



meanfemale  <- aggregate(female ~ id , data=hsb, FUN="mean")
names(meanfemale) <- c('id', 'meanfemale')

hsb <- merge(hsb,meanfemale, by =c('id'))


# If you want to save data so that you don't got through steps
#   to create is again, you can write the results to a file.
#   There are other file formats that you can use (e.g., csv)
#
write.table(hsb, 'hsb.txt', row.names=F, na='.')



######################################################################
# Model fitting                                                      
#
# Note that "Std.Dev." in the Random effects table produced by lmer 
#   is NOT the standard error of the variance estmates.  The latter 
#   is not computed in lmer but it is computed in SAS.  These is some 
#   controversy over whether se of variance estimates should even be
#   computed. We do not use them, so not problem.
######################################################################
#
#  Attach file that has all variables
#

attach(hsb)


#
# Random inercept / empty HLM / Null model / RANDOM effects ANOVA
#
#


model0 <- lmer(mathach ~ 1 + (1 | id), data=hsb, REML=FALSE)

summary(model0)


# Cut and paste
icc <- 8.553/(8.553+39.148)
icc

# Or extract the information to compute

vars <- as.data.frame(VarCorr(model0))[4]
total <- sum(vars)
tau00 <- vars[1,1]
icc <- tau00/total
icc

# Use the following function that I wrote:

icc.lmer <- function(modl){
  vars <- as.data.frame(VarCorr(modl))[4]
  total <- sum(vars)
  tau00 <- vars[1,1]
  icc <- tau00/total
  return(icc)
  }

# example of how to use this
icc.lmer(model0)


#
#  Random intercept and fixed slope for ses
#

#
#  To get fixed effects df (Satterhwaite)  p-value
#
require(lmerTest)

model1 <- lmer(mathach ~ 1 + ses + (1 | id), data=hsb, REML=FALSE)
summary(model1)


icc.lmer(model1)

####################
## Nifty Package   #
####################
library(texreg)

# Nice display
screenreg(list(model0,model1))

mytable <- texreg(list(model0, model2), label = "tab:4",
      caption = "Bolded coefficients, custom notes, three digits.",
      float.pos = "h", bold = 0.05, stars = 0,  custom.note =
      "Coefficients with $p < 0.05$ in \\textbf{bold}.",
      digits = 3, leading.zero = FALSE, omit.coef = "Inter")



	  
	  
	  
#
#  Random intercept and fixed slope & meanses for ses
#

model2 <- lmer(mathach ~ 1 + ses + meanses + (1 | id), data=hsb, REML=FALSE)

summary(model2)

#
#  Need to center ses (if haven't already done above for graph)
#

cses <- ses - meanses

model3 <- lmer(mathach ~ 1 + cses  + (1 | id), data=hsb, REML=FALSE)

summary(model3)

icc.lmer(model3)

# Plot population average regression

intercept <- fixef(model3)[1]
slope <- fixef(model3)[2]

intercept.j <- ranef(model3)
student.int <- intercept.j$id[1]


plot(mathach~cses,type = 'n', data = hsb, 
     ylim = c(-10, 30),
     xlim = c(-3, 3),
     cex.main = 1.5,
     xlab = 'School Centered SES',  
     ylab = "Fitted Math Achievement Scores",
     main = "Population Average Regression "
     )
# Population average line
for(i in min(cses):max(cses)){
   abline(a=intercept, b=slope, color='black',lwd=4)
}

# Population and School Specific

plot(mathach~cses,type = 'n', data = hsb, 
     ylim = c(-10, 30),
     xlim = c(-3, 3),
     cex.main = 1.5,
     xlab = 'School Centered SES',  
     ylab = "Fitted Math Achievement Scores",
     main = "Population & School Specific Regressions "
     )
# draw in scho0l specific line
for (i in 1:length(school.list)){
   abline(a=(intercept+student.int[i,]), b=slope, type='l', col='lightpink')
  }

# Population average line
for(i in min(cses):max(cses)){
   abline(a=intercept, b=slope, color='black',lwd=4)
}

#
#  Need to center ses & add back in mean ses
#


model4 <- lmer(mathach ~ 1 + cses + meanses + (1 | id), data=hsb, REML=FALSE)

summary(model4)

icc.lmer(model4)


#
#  lots of level 1 variables
#


model5 <- lmer(mathach ~ 1 + cses + female + minority + meanses + (1 | id), data=hsb, REML=FALSE)

summary(model5)

icc.lmer(model5)

#
#  lots of level 1  & 2 variables
#


model6 <- lmer(mathach ~ 1 + cses + female + minority + meanses + himinty  + pracad 
                         + disclim + sector + size +(1 | id), data=hsb, REML=FALSE)

summary(model6)


screenreg(list(model0,model5,model6,model7))

#
#  even more  2 variables
#

model7 <- lmer(mathach ~ 1 + cses + female + minority + meanses + meanminority + meanfemale 
                         + himinty  + pracad + disclim + sector + size +(1 | id), data=hsb, REML=FALSE)

summary(model7)

# Quick comparison
screenreg(list(model0,model5,model6,model7))