# edps 587
# spring 2020
# c.j.anderson
#
# Riesby depression data from Hedeker: goes with 1st lecture on
#  longitudinal data
#
#

library(lme4)
library(lmerTest)
library(texreg)
library(optimx)
library(lattice)
library(ggplot2)
library(ellipse)    # for plot of correlations matrix
library(reshape2)   # I used this to change from long to wide format

source("D:\\Dropbox\\edps587\\lectures\\9 longitudinal\\All_functions.txt")

hd <- read.table("D:\\Dropbox\\edps587\\lectures\\9 longitudinal\\riesby_data_time_invarient_for_r.txt", header=TRUE)

# Remove missing --- not ideal but for lecture....
hd <- na.omit(hd)

#hd$hamd <- as.numeric(hd$hamd)
			   
# Add integer school id called "index"
id <- unique(hd$id)
index <- seq(1:length(id))
to.add <- cbind(id,index)
hd <- merge(hd,to.add,by=c("id"))
n <- length(index)

# Panel plot with join
xyplot(hamd ~ week | id, 
       data=hd, 
	   col.line="blue", 
	   type=c("p","l"),
	   main="Plots of Hamlition Index by Week: Join Points")
	   
	   
# Panel plot with linear regression
xyplot(hamd ~ week | id, 
       data=hd, 
	   col.line="red", 
	   type=c("p","r"),
	   main="Plots of Hamlition Index by Week: Linear Regression")

# Panel plot with spline
xyplot(hamd ~ week | id, 
       data=hd, 
	   col.line=c("red"), 
	   lwd=2,
	   type=c("p","spline"),
	   main="Plots of Hamlition Index by Week: Spline")

# Panel plot with spline and Linear
xyplot(hamd ~ week | id, 
       data=hd, 
	   col.line=c("red","black","blue"), 
	   lwd=2,
	   type=c("p","spline","r"),
	   main="Plots of Hamlition Index by Week: Spline & Linear Regression")



# Join points by subject
hd$id <- as.factor(hd$id)
week.hamd <- aggregate(hamd ~ week + id , data=hd, FUN="mean")
plot(hd$week, hd$hamd,type = 'n', 
     cex.main = 1.5,
     xlab = 'Week', 
     ylab = "Hamliton Depression Index",
     main = "Join points for Person"
     )
# --- put in proper order
	j <- order(hd$index)
for (j in 1:n) {
  sub <- hd[ which(hd$index==j),]
  lines(sub$week,sub$hamd,col=j)
}


# All linear regressions in one plot
plot(hd$week, hd$hamd,type = 'n', 
     cex.main = 1.5,
     xlab = 'Week', 
     ylab = "Hamliton Depression Index",
     main = "Separate Linear Regression for Person"
     )
# --- put in proper order
	j <- order(hd$index)
for (j in 1:n) {
  sub <- hd[ which(hd$index==j),]
  fitted <- fitted(lm(hamd~week,sub))
  lines(sub$week,fitted,col=j)
}


# All linear regressions with quadratic in one plot
plot(hd$week, hd$hamd,type = 'n', 
     cex.main = 1.5,
     xlab = 'Week', 
     ylab = "Hamliton Depression Index",
     main = "Separate Quadratic Regression for Person"
     )
# --- put in proper order
	j <- order(hd$index)
for (j in 1:n) {
  sub <- hd[ which(hd$index==j),]
  sub$wksq <- sub$week*sub$week
  fitted <- fitted(lm(hamd~week + wksq, sub))
  lines(sub$week,fitted,col=j)
}


#
# Data averages
#
week.hamd2 <- aggregate(hamd ~ week, data=hd,FUN="mean")
plot(week.hamd2$week,week.hamd2$hamd,type='b',
     lwd=2,
	 col='blue',
	 main="Overall Mean Hamliton by Week",
	 xlab="Week",
	 ylab="Hamlition Depression Index",
	 ylim=c(0,40))
	 
#
# Mean by week by type
#
week.hamd3 <- aggregate(hamd ~ week + endog, data=hd,FUN="mean")
plot(week.hamd3$week[0:6],week.hamd3$hamd[0:6],type='b',
     lwd=2,
	 col='blue',
	 main="Mean Hamliton by Week \n seperate lines for type of depression",
	 xlab="Week",
	 ylab="Hamlition Depression Index",
	 ylim=c(0,40))
lines(week.hamd3$week[7:12],week.hamd3$hamd[7:12],type='b',lwd=2,col='red')
legend("topleft",legend=c("Non-Endogenous","Endogenous"),col=c("red","blue"),
       title="Type of Depression",lty=c(1,1),lwd=2, cex=1.1)

#################################################################
# Rsq and metaRsq
###########################################s######################

N <- length(unique(hd$id))       # number of patients
ssmodel <- c(0)                  # object to hold model SS
sstotal <- c(0)                  # object to hold total SS
R2 <- matrix(99,nrow=N,ncol=2)   # create object to hold  Rsquares

for (i in 1:N){
     sub <- hd[ which(hd$index==i),]
     model0 <- lm(hamd ~ week, data=sub)
	 a <- anova(model0)
     ssmodel <- ssmodel + a[1,2] 
	 sstotal <- sstotal + sum(a[,2])
	 R2[i,1:2] <- c(i,summary(model0)$r.squared)
}

R2meta <- ssmodel/sstotal          # compute meta R2 for model 1            
R2.mod1 <- R2[1:N,2]               # hold R2 from first model

# Plot R2 by sample size
nj <- as.numeric(table(hd$id))

plot(nj,R2.mod1,
     xlab="Sample Size for Each Person",
 	 ylab=expression(R^2),
 	 main=expression(paste(R^2," Model 1:   ",Y[it], " = ", beta[oi], " + ", beta[li], x[lit], " + ", R[it])),
     cex=1.2,
	 ylim=c(0,1),
	 type='p',
	 col="blue")
x <- 1:6
y <- rep(R2meta,6)
lines(x,y,type='l',col="red")
text(4.3,.8,expression(paste(R^2,"meta = .75")),cex=1.2)


#
# R2 and meta R2:   MODEL 2
#
ssmodel <- c(0)                  # object to hold model SS
sstotal <- c(0)                  # object to hold total SS
R2b <- matrix(99,nrow=N,ncol=2)   # create object to hold  Rsquares

for (i in 1:N){
     sub <- hd[ which(hd$index==i),]
	 sub$wksq <- sub$week*sub$week
     model2 <- lm(hamd ~ week + wksq, data=sub)
	 a <- anova(model2)
     ssmodel <- ssmodel + a[1,2] + a[2,2]
	 sstotal <- sstotal + sum(a[,2])
	 R2b[i,1:2] <- c(i,summary(model2)$r.squared)
}

R2meta <- ssmodel/sstotal          # compute meta R2 for model 1            
R2.mod2 <- R2b[1:N,2]               # hold R2 from first model

# Plot R2 by sample size
nj <- as.numeric(table(hd$id))

plot(nj,R2.mod2,
     xlab="Sample Size for Each Person",
 	 ylab=expression(R^2),
 	 main=expression(paste(R^2," Model 1:   ",Y[it], " = ", beta[oi], " + ", beta[li], x[lit], " + ", beta[q,i], x[qit]^2, " + " ,R[it])),
     cex=1.2,
	 ylim=c(0,1),
	 type='p',
	 col="blue")
x <- 1:6
y <- rep(R2meta,6)
lines(x,y,type='l',col="red")
text(4.3,.89,expression(paste(R^2,"meta = .84")),cex=1.2)


#### comparison
plot(R2.mod1,R2.mod2,type="p", main="Improvement from Adding Quadratic Term",
     col="blue",
	 xlim=c(0,1),
	 ylim=c(0,1),
	 ylab=expression(paste(R^2," Model 1:   ",Y[it], " = ", beta[oi], " + ", beta[li], x[lit], " + ", beta[q,i], x[qit]^2, " + " ,R[it])),
	 xlab=expression(paste(R^2," Model 1:   ",Y[it], " = ", beta[oi], " + ", beta[li], x[lit], " + " ,R[it])),
)
x <- 0:1
y <- 0:1
lines(x,y,type="l",col="red")


#### comparison:  who is improving
R2mod1 <- as.data.frame(R2.mod1)
R2mod2 <- as.data.frame(R2.mod2)
Rsqs <- cbind(R2mod1,R2mod2)
Rsqs$index <- index
combined <- merge(hd,Rsqs,by="index")

nonRsq <- combined[which(combined$endog==0),]
endRsq <- combined[which(combined$endog==1),]

plot(nonRsq$R2.mod1,nonRsq$R2.mod2,type="p", main="Who Is Better Fit by Quadratic Term",
     col="blue", pch=21, cex=1.2,
	 xlim=c(0,1),
	 ylim=c(0,1),
	 ylab=expression(paste(R^2," Model 1:   ",Y[it], " = ", beta[oi], " + ", beta[li], x[lit], " + ", beta[q,i], x[qit]^2, " + " ,R[it])),
	 xlab=expression(paste(R^2," Model 1:   ",Y[it], " = ", beta[oi], " + ", beta[li], x[lit], " + " ,R[it])),
)
lines(endRsq$R2.mod1,endRsq$R2.mod2,type="p",col="black",pch=19)
x <- 0:1
y <- 0:1
lines(x,y,type="l",col="red")
legend(x="bottomright", legend=c("Non-Endogenous", "Endogenous"),
       col=c("blue", "black"), pch=c(21,19), cex=1.2)

#
# Compute mean squared residual from lm and fit regression to them
# 
# Looking at variance functions by looking at
#  (mean) squared residuals
#
hd$weeksq <- hd$week**2
ols <- lm(hamd ~ 1 + week + weeksq + endog,data=hd)
hd$res.sq <- residuals(ols)*residuals(ols)
mres <- aggregate(res.sq ~ week + endog,data=hd,FUN="mean")

par(mfrow=c(2,2))
end <- mres[which(mres$endog==1),]
nonend <- mres[which(mres$endog==0),]

plot(end$week,end$res.sq,type='l',col='red',
     xlim=c(0,5),
	 ylim=c(10,70),
	 xlab="Week",
	 ylab="Mean Squared Residuals",
	 main="Data")
lines(nonend$week,nonend$res.sq,type='l',col='blue')
legend("topleft",col=c("red","blue"),lty=1,
       legend=c("Endogenous","Non-Endogenous"))

lin.end <- lm(res.sq ~ week,data=end)
lin.non <- lm(res.sq ~ week,data=nonend)
plot(end$week,fitted(lin.end),type='l',col='red',
     xlim=c(0,5),
	 ylim=c(10,70),
	 xlab="Week",
	 ylab="Mean Squared Residuals",
	 main="Fit using Linear Regression")
lines(nonend$week,fitted(lin.non),type='l',col='blue')
legend("topleft",col=c("red","blue"),lty=1,
       legend=c("Endogenous","Non-Endogenous"))

	   
end$week2 <- end$week*end$week
quad.end <- lm(res.sq ~ week + week2,data=end)
nonend$week2 <- end$week2
quad.non <- lm(res.sq ~ week + week2,data=nonend)
plot(end$week,fitted(quad.end),type='l',col='red',
     xlim=c(0,5),
	 ylim=c(10,70),
	 xlab="Week",
	 ylab="Mean Squared Residuals",
	 main="Quadratic")
lines(nonend$week,fitted(quad.non),type='l',col='blue')
legend("topleft",col=c("red","blue"),lty=1,
       legend=c("Endogenous","Non-Endogenous"))

	   
end$week3 <- end$week*end$week*end$week
cub.end <- lm(res.sq ~ week + week2 + week3,data=end)
nonend$week3 <- end$week3
cub.non <- lm(res.sq ~ week + week2 + week3,data=nonend)
plot(end$week,fitted(cub.end),type='l',col='red',
     xlim=c(0,5),
	 ylim=c(10,70),
	 xlab="Week",
	 ylab="Mean Squared Residuals",
	 main="Cubic")
lines(nonend$week,fitted(cub.non),type='l',col='blue')
legend("topleft",col=c("red","blue"),lty=1,
       legend=c("Endogenous","Non-Endogenous"))

#################################
# Looking at serial correlation #
#################################
# -- change from long to wide
hd$wk <- as.factor(hd$week)
wide <- spread(data=hd,key=wk,value=hamd)
#-- since this is not working, I will use "brute force"
hd$week <- factor(hd$week)

levels(hd$week)[levels(hd$week)=="0"] <- "week0"
levels(hd$week)[levels(hd$week)=="1"] <- "week1"
levels(hd$week)[levels(hd$week)=="2"] <- "week2"
levels(hd$week)[levels(hd$week)=="3"] <- "week3"
levels(hd$week)[levels(hd$week)=="4"] <- "week4"
levels(hd$week)[levels(hd$week)=="5"] <- "week5"

wide <- dcast(hd, id ~ week, value.var="hamd")

weeks <- data.frame(wide$week0,wide$week1,wide$week2,wide$week3,wide$week4,wide$week5)

ctab <- cor(weeks, use="pairwise.complete.obs")
round(ctab,digits=2)

# grey-scale plot
plotcorr(ctab)

# color plot
colorfun <- colorRamp(c("#CC0000","white","#3366CC"), space="Lab")
plotcorr(ctab, col=rgb(colorfun((ctab+1)/2), maxColorValue=255))


#############################################
# The Actual modeling of data               #
#############################################

###########################################################
# lmer Models: models with random week^2 do not converge  #
#              but important ones I think are OK.         # 
###########################################################
#
# Null/empty Model
#
summary(model.null <- lmer(hamd ~ 1 + (1 | id),
                        data=hd,REML=FALSE,control = lmerControl(optimizer ="Nelder_Mead")))
   
bic.hlm(model.null,cluster.id=hd$id)

# just week: model0

hd$id <- as.factor(hd$id)
hd$week <- as.numeric(hd$week)

hd$weeksq <- hd$week*hd$week
summary(model.0 <- lmer(hamd ~ 1 + week + weeksq + endog  + (1 | id),
                        data=hd, REML=FALSE,
						control = lmerControl(optimizer ="Nelder_Mead")))
   
#
# Prelimnary model
#
summary(model.pre <- lmer(hamd ~ 1 + week + weeksq  + endog  + endog*week 
                         +(1 + week + weeksq  | id), 
						 data=hd,
						 REML=FALSE,
						 control = lmerControl(optimizer ="Nelder_Mead")))
bic.hlm(model.pre,cluster.id=hd$id)

texreg(list(model.null, model.0, model.pre),
       custom.model.names=c("Null","Some Fixed","Preliminary"), 
	   single.row=TRUE) 
 
#
# Prelimnary model w/ cubic week -- converged when re-scale weeksq
#
hd$week3 <- hd$weeksq * hd$week
hd$xweeksq <- hd$weeksq/10

summary(model.preCubic <- lmer(hamd ~ 1 + week + xweeksq + week3 + endog  + endog*week 
                         +(1 + week + xweeksq  | id), 
						 data=hd,
						 REML=FALSE,
						 control = lmerControl(optimizer ="Nelder_Mead")))
bic.hlm(model.preCubic,cluster.id=hd$id)

#
# Test whether need week2 as Random effect in preliminary model
#  ~ p 74 of notes
#
summary(model.reduced1 <- lmer(hamd ~ 1 + week + weeksq  + endog  + endog*week 
                         +(1 + week  | id), 
						 data=hd,
						 REML=FALSE,
						 control = lmerControl(optimizer ="Nelder_Mead")))
bic.hlm(model.reduced1,cluster.id=hd$id)

# Basically code from computer lab3 to give nice table
test.table <- anova(model.reduced1,model.pre)
p1  <- test.table[2,8]                                 
df0 <- test.table[2,7] - 1
p0  <- pchisq(test.table[2,6],df0,lower.tail=FALSE)    
pvalue <- .5*(p1+p0)
test.out <- matrix(c(test.table[2,6],test.table[2,7],df0,p1,p0,pvalue),nrow=1)
test.out <- as.data.frame(test.out)
names(test.out) <- c("LR statistic","df1","df0","p1","p0","pvalue")
round(test.out,digits=3)



# Drop endog*week from model.pre   & final for now
summary(model.reduced2 <- lmer(hamd ~ 1 + week + weeksq  + endog  
                         +(1 + week + weeksq  | id), 
						 data=hd,
						 REML=FALSE,
						 control = lmerControl(optimizer ="Nelder_Mead")))
bic.hlm(model.reduced2,cluster.id=hd$id)


# Drop endog 
summary(model.reduced3 <- lmer(hamd ~ 1 + week + weeksq   
                         +(1 + week + weeksq  | id), 
						 data=hd,
						 REML=FALSE,
						 control = lmerControl(optimizer ="Nelder_Mead")))
bic.hlm(model.reduced3,cluster.id=hd$id)
anova(model.reduced3,model.reduced2)
 

texreg(list(model.pre, model.reduced2),
       custom.model.names=c("Preliminary","No Interaction"), 
	   single.row=TRUE) 
 
############################################3
# Plots of Fitted Values                   #
############################################ 
hd$fit2 <- fitted(model.reduced2)

mean.fit2 <- aggregate(fit2 ~ week + endog,data=hd,FUN="mean")

#
#  Population average model
#
fit2.endog <-  mean.fit2[which(mean.fit2$endog==1),]
plot(fit2.endog$week,fit2.endog$fit2,type="b",pch=19,col="blue",cex=1.3,
         main="Mean Fitted Diagnosis by Week",
		 xlab="Week",
		 ylab="Fitted Mean Depression",
		 ylim=c(0,35))
fit2.nondog <- mean.fit2[which(mean.fit2$endog==0),]
lines(fit2.nondog$week,fit2.nondog$fit2,type="b",pch=19,col="orange",cex=1.3)
legend("topleft",col=c("blue","orange"),lty=1,cex=1.3,
       legend=c("Endogenous","Non-Endogenous"))
	   
#
# Conditional regressions
#
# First get them & make data frames
U <- ranef(model.reduced2)
U <- as.data.frame(U)

U0j <- U[which(U$term=="(Intercept)"),]
names(U0j) <- c("grpvar","term","id","U0j","sdU0j")

U1j <- U[which(U$term=="week"),]
names(U1j) <- c("grpvar","term","id","U1j","sdU1j")

U2j <- U[which(U$term=="weeksq"),]
names(U2j) <- c("grpvar","term","id","U2j","sdU2j")

# Put them in hd so have wide-format 
Uwide <- merge(U0j,U1j,by="id")
Uwide <- merge(Uwide,U2j,by="id")
hd <- merge(hd,Uwide,by="id")


# Compute person specific regressions
g2 <- summary(model.reduced2)$coefficients

hd$person.fit <- g2[1] + g2[2]*hd$week + g2[3]*hd$weeksq + g2[4]*hd$endog  + hd$U0j + hd$U1j*hd$week + hd$U2j*hd$weeksq

# Plot by person
sorted <- hd[order(hd$id),] 
plot(hd$week,hd$person.fit,type="n",
     main="Person Specific Regressions (Conditional on Us)",
	 xlab="Week",
	 ylab="Fitted Depression Index",
	 cex=1.5,
	 ylim=c(0,40))
for (i in hd$id)  {
 sub <- sorted[which(sorted$id==i),]
 lines(sub$week,sub$person.fit,col=i)
 }