# edps 587 # spring 2020 # c.j.anderson # # Riesby depression data from Hedeker: goes with 1st lecture on # longitudinal data # # library(lme4) library(lmerTest) library(texreg) library(optimx) library(lattice) library(ggplot2) library(ellipse) # for plot of correlations matrix library(reshape2) # I used this to change from long to wide format source("D:\\Dropbox\\edps587\\lectures\\9 longitudinal\\All_functions.txt") hd <- read.table("D:\\Dropbox\\edps587\\lectures\\9 longitudinal\\riesby_data_time_invarient_for_r.txt", header=TRUE) # Remove missing --- not ideal but for lecture.... hd <- na.omit(hd) #hd$hamd <- as.numeric(hd$hamd) # Add integer school id called "index" id <- unique(hd$id) index <- seq(1:length(id)) to.add <- cbind(id,index) hd <- merge(hd,to.add,by=c("id")) n <- length(index) # Panel plot with join xyplot(hamd ~ week | id, data=hd, col.line="blue", type=c("p","l"), main="Plots of Hamlition Index by Week: Join Points") # Panel plot with linear regression xyplot(hamd ~ week | id, data=hd, col.line="red", type=c("p","r"), main="Plots of Hamlition Index by Week: Linear Regression") # Panel plot with spline xyplot(hamd ~ week | id, data=hd, col.line=c("red"), lwd=2, type=c("p","spline"), main="Plots of Hamlition Index by Week: Spline") # Panel plot with spline and Linear xyplot(hamd ~ week | id, data=hd, col.line=c("red","black","blue"), lwd=2, type=c("p","spline","r"), main="Plots of Hamlition Index by Week: Spline & Linear Regression") # Join points by subject hd$id <- as.factor(hd$id) week.hamd <- aggregate(hamd ~ week + id , data=hd, FUN="mean") plot(hd$week, hd$hamd,type = 'n', cex.main = 1.5, xlab = 'Week', ylab = "Hamliton Depression Index", main = "Join points for Person" ) # --- put in proper order j <- order(hd$index) for (j in 1:n) { sub <- hd[ which(hd$index==j),] lines(sub$week,sub$hamd,col=j) } # All linear regressions in one plot plot(hd$week, hd$hamd,type = 'n', cex.main = 1.5, xlab = 'Week', ylab = "Hamliton Depression Index", main = "Separate Linear Regression for Person" ) # --- put in proper order j <- order(hd$index) for (j in 1:n) { sub <- hd[ which(hd$index==j),] fitted <- fitted(lm(hamd~week,sub)) lines(sub$week,fitted,col=j) } # All linear regressions with quadratic in one plot plot(hd$week, hd$hamd,type = 'n', cex.main = 1.5, xlab = 'Week', ylab = "Hamliton Depression Index", main = "Separate Quadratic Regression for Person" ) # --- put in proper order j <- order(hd$index) for (j in 1:n) { sub <- hd[ which(hd$index==j),] sub$wksq <- sub$week*sub$week fitted <- fitted(lm(hamd~week + wksq, sub)) lines(sub$week,fitted,col=j) } # # Data averages # week.hamd2 <- aggregate(hamd ~ week, data=hd,FUN="mean") plot(week.hamd2$week,week.hamd2$hamd,type='b', lwd=2, col='blue', main="Overall Mean Hamliton by Week", xlab="Week", ylab="Hamlition Depression Index", ylim=c(0,40)) # # Mean by week by type # week.hamd3 <- aggregate(hamd ~ week + endog, data=hd,FUN="mean") plot(week.hamd3$week[0:6],week.hamd3$hamd[0:6],type='b', lwd=2, col='blue', main="Mean Hamliton by Week \n seperate lines for type of depression", xlab="Week", ylab="Hamlition Depression Index", ylim=c(0,40)) lines(week.hamd3$week[7:12],week.hamd3$hamd[7:12],type='b',lwd=2,col='red') legend("topleft",legend=c("Non-Endogenous","Endogenous"),col=c("red","blue"), title="Type of Depression",lty=c(1,1),lwd=2, cex=1.1) ################################################################# # Rsq and metaRsq ###########################################s###################### N <- length(unique(hd$id)) # number of patients ssmodel <- c(0) # object to hold model SS sstotal <- c(0) # object to hold total SS R2 <- matrix(99,nrow=N,ncol=2) # create object to hold Rsquares for (i in 1:N){ sub <- hd[ which(hd$index==i),] model0 <- lm(hamd ~ week, data=sub) a <- anova(model0) ssmodel <- ssmodel + a[1,2] sstotal <- sstotal + sum(a[,2]) R2[i,1:2] <- c(i,summary(model0)$r.squared) } R2meta <- ssmodel/sstotal # compute meta R2 for model 1 R2.mod1 <- R2[1:N,2] # hold R2 from first model # Plot R2 by sample size nj <- as.numeric(table(hd$id)) plot(nj,R2.mod1, xlab="Sample Size for Each Person", ylab=expression(R^2), main=expression(paste(R^2," Model 1: ",Y[it], " = ", beta[oi], " + ", beta[li], x[lit], " + ", R[it])), cex=1.2, ylim=c(0,1), type='p', col="blue") x <- 1:6 y <- rep(R2meta,6) lines(x,y,type='l',col="red") text(4.3,.8,expression(paste(R^2,"meta = .75")),cex=1.2) # # R2 and meta R2: MODEL 2 # ssmodel <- c(0) # object to hold model SS sstotal <- c(0) # object to hold total SS R2b <- matrix(99,nrow=N,ncol=2) # create object to hold Rsquares for (i in 1:N){ sub <- hd[ which(hd$index==i),] sub$wksq <- sub$week*sub$week model2 <- lm(hamd ~ week + wksq, data=sub) a <- anova(model2) ssmodel <- ssmodel + a[1,2] + a[2,2] sstotal <- sstotal + sum(a[,2]) R2b[i,1:2] <- c(i,summary(model2)$r.squared) } R2meta <- ssmodel/sstotal # compute meta R2 for model 1 R2.mod2 <- R2b[1:N,2] # hold R2 from first model # Plot R2 by sample size nj <- as.numeric(table(hd$id)) plot(nj,R2.mod2, xlab="Sample Size for Each Person", ylab=expression(R^2), main=expression(paste(R^2," Model 1: ",Y[it], " = ", beta[oi], " + ", beta[li], x[lit], " + ", beta[q,i], x[qit]^2, " + " ,R[it])), cex=1.2, ylim=c(0,1), type='p', col="blue") x <- 1:6 y <- rep(R2meta,6) lines(x,y,type='l',col="red") text(4.3,.89,expression(paste(R^2,"meta = .84")),cex=1.2) #### comparison plot(R2.mod1,R2.mod2,type="p", main="Improvement from Adding Quadratic Term", col="blue", xlim=c(0,1), ylim=c(0,1), ylab=expression(paste(R^2," Model 1: ",Y[it], " = ", beta[oi], " + ", beta[li], x[lit], " + ", beta[q,i], x[qit]^2, " + " ,R[it])), xlab=expression(paste(R^2," Model 1: ",Y[it], " = ", beta[oi], " + ", beta[li], x[lit], " + " ,R[it])), ) x <- 0:1 y <- 0:1 lines(x,y,type="l",col="red") #### comparison: who is improving R2mod1 <- as.data.frame(R2.mod1) R2mod2 <- as.data.frame(R2.mod2) Rsqs <- cbind(R2mod1,R2mod2) Rsqs$index <- index combined <- merge(hd,Rsqs,by="index") nonRsq <- combined[which(combined$endog==0),] endRsq <- combined[which(combined$endog==1),] plot(nonRsq$R2.mod1,nonRsq$R2.mod2,type="p", main="Who Is Better Fit by Quadratic Term", col="blue", pch=21, cex=1.2, xlim=c(0,1), ylim=c(0,1), ylab=expression(paste(R^2," Model 1: ",Y[it], " = ", beta[oi], " + ", beta[li], x[lit], " + ", beta[q,i], x[qit]^2, " + " ,R[it])), xlab=expression(paste(R^2," Model 1: ",Y[it], " = ", beta[oi], " + ", beta[li], x[lit], " + " ,R[it])), ) lines(endRsq$R2.mod1,endRsq$R2.mod2,type="p",col="black",pch=19) x <- 0:1 y <- 0:1 lines(x,y,type="l",col="red") legend(x="bottomright", legend=c("Non-Endogenous", "Endogenous"), col=c("blue", "black"), pch=c(21,19), cex=1.2) # # Compute mean squared residual from lm and fit regression to them # # Looking at variance functions by looking at # (mean) squared residuals # hd$weeksq <- hd$week**2 ols <- lm(hamd ~ 1 + week + weeksq + endog,data=hd) hd$res.sq <- residuals(ols)*residuals(ols) mres <- aggregate(res.sq ~ week + endog,data=hd,FUN="mean") par(mfrow=c(2,2)) end <- mres[which(mres$endog==1),] nonend <- mres[which(mres$endog==0),] plot(end$week,end$res.sq,type='l',col='red', xlim=c(0,5), ylim=c(10,70), xlab="Week", ylab="Mean Squared Residuals", main="Data") lines(nonend$week,nonend$res.sq,type='l',col='blue') legend("topleft",col=c("red","blue"),lty=1, legend=c("Endogenous","Non-Endogenous")) lin.end <- lm(res.sq ~ week,data=end) lin.non <- lm(res.sq ~ week,data=nonend) plot(end$week,fitted(lin.end),type='l',col='red', xlim=c(0,5), ylim=c(10,70), xlab="Week", ylab="Mean Squared Residuals", main="Fit using Linear Regression") lines(nonend$week,fitted(lin.non),type='l',col='blue') legend("topleft",col=c("red","blue"),lty=1, legend=c("Endogenous","Non-Endogenous")) end$week2 <- end$week*end$week quad.end <- lm(res.sq ~ week + week2,data=end) nonend$week2 <- end$week2 quad.non <- lm(res.sq ~ week + week2,data=nonend) plot(end$week,fitted(quad.end),type='l',col='red', xlim=c(0,5), ylim=c(10,70), xlab="Week", ylab="Mean Squared Residuals", main="Quadratic") lines(nonend$week,fitted(quad.non),type='l',col='blue') legend("topleft",col=c("red","blue"),lty=1, legend=c("Endogenous","Non-Endogenous")) end$week3 <- end$week*end$week*end$week cub.end <- lm(res.sq ~ week + week2 + week3,data=end) nonend$week3 <- end$week3 cub.non <- lm(res.sq ~ week + week2 + week3,data=nonend) plot(end$week,fitted(cub.end),type='l',col='red', xlim=c(0,5), ylim=c(10,70), xlab="Week", ylab="Mean Squared Residuals", main="Cubic") lines(nonend$week,fitted(cub.non),type='l',col='blue') legend("topleft",col=c("red","blue"),lty=1, legend=c("Endogenous","Non-Endogenous")) ################################# # Looking at serial correlation # ################################# # -- change from long to wide hd$wk <- as.factor(hd$week) wide <- spread(data=hd,key=wk,value=hamd) #-- since this is not working, I will use "brute force" hd$week <- factor(hd$week) levels(hd$week)[levels(hd$week)=="0"] <- "week0" levels(hd$week)[levels(hd$week)=="1"] <- "week1" levels(hd$week)[levels(hd$week)=="2"] <- "week2" levels(hd$week)[levels(hd$week)=="3"] <- "week3" levels(hd$week)[levels(hd$week)=="4"] <- "week4" levels(hd$week)[levels(hd$week)=="5"] <- "week5" wide <- dcast(hd, id ~ week, value.var="hamd") weeks <- data.frame(wide$week0,wide$week1,wide$week2,wide$week3,wide$week4,wide$week5) ctab <- cor(weeks, use="pairwise.complete.obs") round(ctab,digits=2) # grey-scale plot plotcorr(ctab) # color plot colorfun <- colorRamp(c("#CC0000","white","#3366CC"), space="Lab") plotcorr(ctab, col=rgb(colorfun((ctab+1)/2), maxColorValue=255)) ############################################# # The Actual modeling of data # ############################################# ########################################################### # lmer Models: models with random week^2 do not converge # # but important ones I think are OK. # ########################################################### # # Null/empty Model # summary(model.null <- lmer(hamd ~ 1 + (1 | id), data=hd,REML=FALSE,control = lmerControl(optimizer ="Nelder_Mead"))) bic.hlm(model.null,cluster.id=hd$id) # just week: model0 hd$id <- as.factor(hd$id) hd$week <- as.numeric(hd$week) hd$weeksq <- hd$week*hd$week summary(model.0 <- lmer(hamd ~ 1 + week + weeksq + endog + (1 | id), data=hd, REML=FALSE, control = lmerControl(optimizer ="Nelder_Mead"))) # # Prelimnary model # summary(model.pre <- lmer(hamd ~ 1 + week + weeksq + endog + endog*week +(1 + week + weeksq | id), data=hd, REML=FALSE, control = lmerControl(optimizer ="Nelder_Mead"))) bic.hlm(model.pre,cluster.id=hd$id) texreg(list(model.null, model.0, model.pre), custom.model.names=c("Null","Some Fixed","Preliminary"), single.row=TRUE) # # Prelimnary model w/ cubic week -- converged when re-scale weeksq # hd$week3 <- hd$weeksq * hd$week hd$xweeksq <- hd$weeksq/10 summary(model.preCubic <- lmer(hamd ~ 1 + week + xweeksq + week3 + endog + endog*week +(1 + week + xweeksq | id), data=hd, REML=FALSE, control = lmerControl(optimizer ="Nelder_Mead"))) bic.hlm(model.preCubic,cluster.id=hd$id) # # Test whether need week2 as Random effect in preliminary model # ~ p 74 of notes # summary(model.reduced1 <- lmer(hamd ~ 1 + week + weeksq + endog + endog*week +(1 + week | id), data=hd, REML=FALSE, control = lmerControl(optimizer ="Nelder_Mead"))) bic.hlm(model.reduced1,cluster.id=hd$id) # Basically code from computer lab3 to give nice table test.table <- anova(model.reduced1,model.pre) p1 <- test.table[2,8] df0 <- test.table[2,7] - 1 p0 <- pchisq(test.table[2,6],df0,lower.tail=FALSE) pvalue <- .5*(p1+p0) test.out <- matrix(c(test.table[2,6],test.table[2,7],df0,p1,p0,pvalue),nrow=1) test.out <- as.data.frame(test.out) names(test.out) <- c("LR statistic","df1","df0","p1","p0","pvalue") round(test.out,digits=3) # Drop endog*week from model.pre & final for now summary(model.reduced2 <- lmer(hamd ~ 1 + week + weeksq + endog +(1 + week + weeksq | id), data=hd, REML=FALSE, control = lmerControl(optimizer ="Nelder_Mead"))) bic.hlm(model.reduced2,cluster.id=hd$id) # Drop endog summary(model.reduced3 <- lmer(hamd ~ 1 + week + weeksq +(1 + week + weeksq | id), data=hd, REML=FALSE, control = lmerControl(optimizer ="Nelder_Mead"))) bic.hlm(model.reduced3,cluster.id=hd$id) anova(model.reduced3,model.reduced2) texreg(list(model.pre, model.reduced2), custom.model.names=c("Preliminary","No Interaction"), single.row=TRUE) ############################################3 # Plots of Fitted Values # ############################################ hd$fit2 <- fitted(model.reduced2) mean.fit2 <- aggregate(fit2 ~ week + endog,data=hd,FUN="mean") # # Population average model # fit2.endog <- mean.fit2[which(mean.fit2$endog==1),] plot(fit2.endog$week,fit2.endog$fit2,type="b",pch=19,col="blue",cex=1.3, main="Mean Fitted Diagnosis by Week", xlab="Week", ylab="Fitted Mean Depression", ylim=c(0,35)) fit2.nondog <- mean.fit2[which(mean.fit2$endog==0),] lines(fit2.nondog$week,fit2.nondog$fit2,type="b",pch=19,col="orange",cex=1.3) legend("topleft",col=c("blue","orange"),lty=1,cex=1.3, legend=c("Endogenous","Non-Endogenous")) # # Conditional regressions # # First get them & make data frames U <- ranef(model.reduced2) U <- as.data.frame(U) U0j <- U[which(U$term=="(Intercept)"),] names(U0j) <- c("grpvar","term","id","U0j","sdU0j") U1j <- U[which(U$term=="week"),] names(U1j) <- c("grpvar","term","id","U1j","sdU1j") U2j <- U[which(U$term=="weeksq"),] names(U2j) <- c("grpvar","term","id","U2j","sdU2j") # Put them in hd so have wide-format Uwide <- merge(U0j,U1j,by="id") Uwide <- merge(Uwide,U2j,by="id") hd <- merge(hd,Uwide,by="id") # Compute person specific regressions g2 <- summary(model.reduced2)$coefficients hd$person.fit <- g2[1] + g2[2]*hd$week + g2[3]*hd$weeksq + g2[4]*hd$endog + hd$U0j + hd$U1j*hd$week + hd$U2j*hd$weeksq # Plot by person sorted <- hd[order(hd$id),] plot(hd$week,hd$person.fit,type="n", main="Person Specific Regressions (Conditional on Us)", xlab="Week", ylab="Fitted Depression Index", cex=1.5, ylim=c(0,40)) for (i in hd$id) { sub <- sorted[which(sorted$id==i),] lines(sub$week,sub$person.fit,col=i) }