# edps 590BAY ---- post this one # Fall 2019 # C.J.Anderson # # # Illustration of simple linear regression...and a couple of predictors # rjags # run.jags # jagsUI (most verbose output) # library(coda) library(rjags) setwd("C:\\Users\\cja\\Dropbox\\edps 590BAY\\Lectures\\7 Linear Regression") #setwd("C:\\Users\\cja\\Documents\\Dropbox\\edps 590BAY\\Lectures\\7 Linear Regression") #source("C:\\Users\\cja\\Dropbox\\edps 590BAY\\Lectures\\DBDA2E-utilities.txt") ano <- read.table("anorexia_data.txt",header=T) ano$change <- ano$weight2 - ano$weight1 # This is just to check which treatement is which ano$Rx <- ano$rx chk<-aggregate(change~Rx,data=ano, FUN="mean") chk table(ano$rx) # Create dummy codes ctrl vs others ano$altRx <- ifelse(ano$rx==2,0,1) # Mean for each level of altRx aggregate(change~altRx,data=ano,FUN="mean") # Ordinary least square regression ols.lm <- lm(change~altRx, data=ano) summary(ols.lm) # or just a two independent sample t-test t.test(change~altRx, data=ano) ############################################################################################ set.seed(234590) dataList <- list(y=ano$change, x=ano$altRx, N=length(ano$change), sdY = sd(ano$change) ) ModelLm1 = "model { ## Likelihood for (i in 1:N){ y[i] ~ dnorm(mu[i] , 1/sigma^2) mu[i] <- b0 + b1*x[i] yhat[i] <- b0 + b1*x[i] ## For posterior predictive analysis res[i] <- y[i] - mu[i] emp.new[i] ~ dnorm(mu[i],1/sigma^2) res.new[i] <- emp.new[i] - mu[i] } ## Priors b0 ~ dnorm(0 , 1/(10*sdY^2) ) b1 ~ dnorm(0 , 1/(10*sdY^2) ) sigma ~ dunif( 1E-3, 1E+30 ) ## Derived paramters fit <- sum(res[]) fit.new <- sum(res.new[]) } " writeLines(ModelLm1, con="ModelLm1.txt") b0Init = mean(ano$change) b1Init = 0 sigmaInit = sd(ano$change) initsList = list(b0=b0Init, b1=b1Init, sigma=sigmaInit ) jagsModelLm1 <- jags.model(file="ModelLm1.txt", # compiles and intializes model data=dataList, inits=initsList, n.chains=4, n.adapt=500) # adaptive phase for 500 iterations update (jagsModelLm1, n.iter=500) # gets samples from posterior with a # burn in of 500 iterations # contains samples from all chains with 500 iterations Samples <- coda.samples(jagsModelLm1, variable.names=c("b0","b1","sigma","fit","fit.new"), n.iter=4000) # output summary information summary(Samples) par(ask=TRUE) plot(Samples) autocorr.plot(Samples2auto.layout=TRUE) geweke.diag(Samples,frac1=0.1,frac2=0.5) effectiveSize(Samples) cumuplot(Samples,probs=c(.25,.50,.75),lwd=c(1,2),lty=c(2,1),col=c("blue","red")) gelman.plot(Samples) ###################################### ## Posterior predictive check ## ###################################### ## Extract fit and new.fit Samples <- coda.samples(jagsModelLm1, variable.names=c("b0","b1","sigma","fit","fit.new","yhat"), n.iter=4000) yhat1 <- Samples[[1]] # From chain 1 ypred1 <- as.matrix(yhat1[ ,6:77]) ypost1 <- matrix(ypred1,ncol=1) yhat2 <- Samples[[2]] # From chain 2 ypred2 <- as.matrix(yhat2[ ,6:77]) ypost2 <- matrix(ypred2,ncol=1) yhat3 <- Samples[[3]] # From chain 3 ypred3 <- as.matrix(yhat3[ ,6:77]) ypost3 <- matrix(ypred3,ncol=1) yhat4 <- Samples[[4]] # From chain 4 ypred4 <- as.matrix(yhat4[ ,6:77]) ypost4 <- matrix(ypred4,ncol=1) ypost <- cbind(ypost1,ypost2,ypost3,ypost4) par(mfrow=c(2,2)) hist(ano$change,breaks=20,main="Data") hist(ypost,breaks=20,main="Posterior Predictive") # Girls within treatement have the same yhat so we could just look # at one girl's posterior distribution s <- summary(Samples) a <- as.array(s) y1 <- Samples[[1]] ####### I will just look at 1 chain y11 <- as.matrix(y1) yhat.rx1 <- y1.72[,8] yhat.rx2 <- y1.72[,35] yhat.rx3 <- y1.72[,72] par(mfrow=c(2,2)) hist(yhat.rx1,main="Treatment 1",breaks=20) hist(yhat.rx2,main="Treatment 2",breaks=20) hist(yhat.rx3,main="Treatment 3",breaks=20) hist(ypost,breaks=20,main="All chains, all girls") girl.rx1 <- ano[which(ano$rx==1),] girl.rx2 <- ano[which(ano$rx==2),] girl.rx3 <- ano[which(ano$rx==3),] xmean <- mean(yhat.rx1) sd <- sd(yhat.rx1) x <- yhat.rx1 hist(girl.rx1$change,main="Treatement 1", freq=FALSE,ylim=c(0,.14),breaks=20, xlab="Change in Weight",ces.axis=1.5) curve(dnorm(x,xmean,sd*3), add=TRUE,col="blue",lwd=2) xmean <- mean(yhat.rx2) sd <- sd(yhat.rx2) x <- yhat.rx2 hist(girl.rx2$change,main="Treatement 2", freq=FALSE,ylim=c(0,.14),breaks=20, xlab="Change in Weight",ces.axis=1.5) curve(dnorm(x,xmean,sd*3), add=TRUE,col="blue",lwd=2) xmean <- mean(yhat.rx3) sd <- sd(yhat.rx3) x <- yhat.rx3 hist(girl.rx3$change,main="Treatement 3 \n (only 9 observations)", freq=FALSE,ylim=c(0,.14),breaks=20, xlab="Change in Weight",ces.axis=1.5) curve(dnorm(x,xmean,sd*3), add=TRUE,col="blue",lwd=2) ## ############################################################################ ########## all treatments ################################################# # This is just to check which treatement is which ano$Rx <- ano$rx chk<-aggregate(change~Rx,data=ano, FUN="mean") chk table(ano$rx) # Create dummy codes for treatments ano$family <- ifelse(ano$rx==3,1,0) ano$cogn <- ifelse(ano$rx==1,1,0) # Mean for each level of dummies--chk aggregate(change~family,data=ano,FUN="mean") aggregate(change~cogn,data=ano,FUN="mean") ### Ordinary least square regression ols.lm <- lm(change~ family + cogn, data=ano) summary(ols.lm) ### Data dataList <- list(y=ano$change, x1=ano$family, x2=ano$cogn, N=length(ano$change), sdY = sd(ano$change) ) ModelLm2 <- "model { ## Likelihood for (i in 1:N){ y[i] ~ dnorm(mu[i] , tau) mu[i] <- b0 + b1*x1[i] + b2*x2[i] yhat[i] <- b0 + b1*x1[i] + b2*x2[i] res[i] <- y[i] - mu[i] emp.new[i] ~ dnorm(mu[i],tau) res.new[i] <- emp.new[i] - mu[i] } ## Priors b0 ~ dnorm(0 , 1/(10*sdY^2) ) b1 ~ dnorm(0 , 1/(10*sdY^2) ) b2 ~ dnorm(0 , 1/(10*sdY^2) ) sigma ~ dunif(0, 1E+4 ) tau <- 1/sigma^2 ## Derived paramters fit <- sum(res[]) fit.new <- sum(res.new[]) }" writeLines(ModelLm2, con="ModelLm2.txt") b0Init = mean(ano$change) b1Init = 0 b2Init = 0 sigmaInit = sd(ano$change) initsList = list(b0=b0Init, b1=b1Init, b2=b2Init, sigma=sigmaInit ) jagsModelLm2 <- jags.model(file="ModelLm2.txt", # compiles and intializes model data=dataList, inits=initsList, n.chains=4, n.adapt=500) # adaptive phase for 500 iterations update (jagsModelLm2, n.iter=500) # burn in of 500 iterations # gets samples from all chains Samples2 <- coda.samples(jagsModelLm2, variable.names=c("b0","b1","b2","sigma","fit","fit.new"), n.iter=4000) # output summary information summary(Samples2) plot(Samples2) ## Extract fit and new.fit Samples2 <- coda.samples(jagsModelLm2, variable.names=c("b0","b1","sigma","fit","fit.new","yhat"), n.iter=4000) yhat1 <- Samples2[[1]] # From chain 1 ypred1 <- as.matrix(yhat1[ ,6:77]) ypost1 <- matrix(ypred1,ncol=1) yhat2 <- Samples2[[2]] # From chain 2 ypred2 <- as.matrix(yhat2[ ,6:77]) ypost2 <- matrix(ypred2,ncol=1) yhat3 <- Samples2[[3]] # From chain 3 ypred3 <- as.matrix(yhat3[ ,6:77]) ypost3 <- matrix(ypred3,ncol=1) yhat4 <- Samples2[[4]] # From chain 4 ypred4 <- as.matrix(yhat4[ ,6:77]) ypost4 <- matrix(ypred4,ncol=1) ypost <- cbind(ypost1,ypost2,ypost3,ypost4) par(mfrow=c(2,2)) hist(ano$change,breaks=20,main="Data") hist(ypost,breaks=20,main="Posterior Predictive") # Girls within treatement have the same yhat so we could just look # at one girl's posterior distribution s <- summary(Samples) a <- as.array(s) y1 <- Samples2[[1]] ####### I will just look at 1 chain y11 <- as.matrix(y1) yhat.rx1 <- y1.72[,8] yhat.rx2 <- y1.72[,35] yhat.rx3 <- y1.72[,72] par(mfrow=c(2,2)) hist(yhat.rx1,main="Treatment 1",breaks=20) hist(yhat.rx2,main="Treatment 2",breaks=20) hist(yhat.rx3,main="Treatment 3",breaks=20) hist(ypost,breaks=20,main="All chains, all girls") girl.rx1 <- ano[which(ano$rx==1),] girl.rx2 <- ano[which(ano$rx==2),] girl.rx3 <- ano[which(ano$rx==3),] xmean <- mean(yhat.rx1) sd <- sd(yhat.rx1) x <- yhat.rx1 hist(girl.rx1$change,main="Treatement 1", freq=FALSE,ylim=c(0,.14),breaks=20, xlab="Change in Weight",ces.axis=1.5) curve(dnorm(x,xmean,sd*3), add=TRUE,col="blue",lwd=2) xmean <- mean(yhat.rx2) sd <- sd(yhat.rx2) x <- yhat.rx2 hist(girl.rx2$change,main="Treatement 2", freq=FALSE,ylim=c(0,.14),breaks=20, xlab="Change in Weight",ces.axis=1.5) curve(dnorm(x,xmean,sd*3), add=TRUE,col="blue",lwd=2) xmean <- mean(yhat.rx3) sd <- sd(yhat.rx3) x <- yhat.rx3 hist(girl.rx3$change,main="Treatement 3 \n (only 9 observations)", freq=FALSE,ylim=c(0,.14),breaks=20, xlab="Change in Weight",ces.axis=1.5) curve(dnorm(x,xmean,sd*3), add=TRUE,col="blue",lwd=2) ## ########################################################################################### ########### runjags: Alternative way to run model and get output ############## library(runjags) # Needs inital values for each of the 4 chains: meanY = mean(ano$change) sdY = sd(ano$change) initsList = list(list("mu"=meanY, "sigma"=sdY), list("mu"=rnorm(1,2,4), "sigma"=1 ), list("mu"=rnorm(1,4,1), "sigma"=8 ), list("mu"=rnorm(1,-4,2),"sigma"=.5 ) ) initsList out.runjags <- run.jags(model=ModelLm1, monitor=c("b0","b1","sigma","dic"), data=dataList, n.chains=4, inits=initsList) print(out.runjags) ############ jagsUI: a 3rd way to run model and get output ############### library(jagsUI) out.jagsUI <- jags(data=dataList, inits=NULL, parameters.to.save=c("b0","b1","sigma","fit","fit.new"), model.file="ModelLm1.txt", n.chains=4, n.iter=2000, n.burnin=1000) print(out.jagsUI) #Posterior predictive check plot---- this should be in jagsUI pp.check(out.jagsUI, actual = 'fit', new = 'fit.new')