# Edps 590BAY # Fall 2019 # C.J.Anderson # # To tune I tried different jump std deviation until I got acceptence # rate to about 40% # library(HDInterval) library(MCMCpack) setwd("C:\\Users\\cja\\Dropbox\\edps 590BAY\\Lectures\\5 MCMC") #setwd("C:\\Users\\cja\\Dropbox\\edps 590BAY\\Lectures\\3 Normal") # laptop #setwd("C:\\Users\\Carolyn\\Dropbox\\edps 590BAY\\Lectures\\3 Normal") anorex <- read.table("anorexia_data.txt",header=TRUE) # create change anorex$change <- anorex$weight2 - anorex$weight1 ############################################# # Sample statistics ############################################# n=nrow(anorex) ybar = mean(anorex$change) s2 = var(anorex$change) stderr <- sqrt(s2/n) sample.stat <- c(n,ybar,s2,stderr) names(sample.stat) <- c("n","ybar","s2","stderr") sample.stat ###################################################### # From file "metro_log_with_examples_works.txt" read # function metroLogNorm into console ###################################################### # # Change jump std until find accept. Note that tuner[,2]=1 # for acceptance= for rejection # tuner <- metroLogNorm(niter=1000,anorex$change,ybar,stderr,1.75) table(tuner[,2]) # # Easier to set than change everywhere # iter.set <- 5000 ################################################################# # Some tuning: Try different jump sd ################################################################# chain1 <- metroLogNorm(niter=1000,anorex$change,ybar,stderr,0.3) chain2 <- metroLogNorm(niter=1000,anorex$change,ybar,stderr,1.0) chain3 <- metroLogNorm(niter=1000,anorex$change,ybar,stderr,1.75) chain4 <- metroLogNorm(niter=1000,anorex$change,ybar,stderr,2.0) # # What are acceptance rates # n1<- table(chain1[,2]) ( a1 <- n1[2]/1000 * 100) n2 <- table(chain2[,2]) ( a2 <- n2[2]/1000 * 100) n3 <- table(chain3[,2]) ( a3 <- n3[2]/1000 * 100) n4 <-table(chain4[,2]) ( a4 <- n4[2]/1000 * 100) # # All samples and chains # par(mfrow=c(2,2)) x <- seq(1:1000) plot(x,chain1[,1],type="l",col="blue",lwd=2, main="jump sd=0.3, acceptance=85.1%", xlab="Iteration (sample)", ylab="Value of parameter (mean)", ylim=c(-4.5,5) ) plot(x,chain2[,1],type="l",col="blue",lwd=2, main="jump sd=1.00, acceptance=59.8%", xlab="Iteration (sample)", ylab="Value of parameter (mean)", ylim=c(-4.5,5) ) plot(x,chain3[,1],type="l",col="blue",lwd=2, main="jump sd=1.75, acceptance=39.8%", xlab="Iteration (sample)", ylab="Value of parameter (mean)", ylim=c(-4.5,5) ) plot(x,chain4[,1],type="l",col="blue",lwd=2, main="jump sd=2.0 acceptance=39.2%", xlab="Iteration (sample)", ylab="Value of parameter (mean)", ylim=c(-4.5,5) ) #################################################################### # Run chains #################################################################### chain1 <- metroLogNorm(niter=iter.set,anorex$change,ybar,stderr,1.75) chain2 <- metroLogNorm(niter=iter.set,anorex$change,0,stderr,1.75) chain3 <- metroLogNorm(niter=iter.set,anorex$change,-4,stderr,1.75) chain4 <- metroLogNorm(niter=iter.set,anorex$change,4,stderr,1.75) # # What are acceptance rates # table(chain1[,2]) n1 <-table(chain1[,2]) n1[2]/iter.set * 100 table(chain2[,2]) n2 <-table(chain2[,2]) n2[2]/iter.set * 100 table(chain3[,2]) n3 <-table(chain3[,2]) n3[2]/iter.set * 100 table(chain4[,2]) n4 <-table(chain4[,2]) n4[2]/iter.set * 100 # # First 20 iterations/samples # par(mfrow=c(1,1)) first <- seq(1:20) plot(first,chain1[first,1],type="l",col="blue",lwd=2, main="Early iterations", xlab="Iteration (sample)", ylab="Value of parameter (mean)", ylim=c(-4.5,5) ) lines(first,chain2[first,1],col="red",lwd=2) lines(first,chain3[first,1],col="green",lwd=2) lines(first,chain4[first,1],col="purple",lwd=2) # # All samples and chains # par(mfrow=c(1,1)) all <- seq(1:length(chain1[,1])) plot(all,chain1[,1],type="l",col="blue",lwd=2, main="All Samples", xlab="Iteration (sample)", ylab="Value of parameter (mean)", ylim=c(-4.5,5) ) lines(all,chain2[,1],col="red",lwd=2) lines(all,chain3[,1],col="green",lwd=2) lines(all,chain4[,1],col="purple",lwd=2) # # Use 1000-5000 # ch1 <- chain1[1000:5000,] ch2 <- chain2[1000:5000,] ch3 <- chain3[1000:5000,] ch4 <- chain4[1000:5000,] # # Combine distributions to get posterior distribution # post.dist <- rbind(ch1,ch2,ch3, ch4) mean1 <- mean(ch1[,1]) mean2 <- mean(ch2[,1]) mean3 <- mean(ch3[,1]) mean4 <- mean(ch4[,1]) mean.post <- mean(post.dist[,1]) (means <- c(mean.post, mean1,mean2,mean3,mean4)) summary(ch1[,1]) summary(ch2[,1]) summary(ch3[,1]) summary(ch4[,1]) summary(post.dist[,1]) # # Auto-correlations # par(mfrow=c(2,2)) acf(ch1[,1], lag.max =iter.set, plot = TRUE) acf(ch2[,1], lag.max =iter.set, plot = TRUE) acf(ch3[,1], lag.max =iter.set, plot = TRUE) acf(ch4[,1], lag.max =iter.set, plot = TRUE) par(mfrow=c(1,1)) acf(post.dist[,1], lag.max =iter.set, plot = TRUE) # # Densities # std <- sd(post.dist[,1]) par(mfrow=c(2,2)) hist(ch1[,1],main='chain1',freq=FALSE,breaks=20,xlab="Estimated Mean") x <- seq(from=-4, to=4,length.out=1000) curve(dnorm(x, mean.post, std), add=TRUE, col='blue') x <- ch1[,1] lines(density(x,na.rm=T),col="red",lwd=2) hist(ch2[,1],main='chain2',freq=FALSE,breaks=20,xlab="Estimated Mean") x <- seq(from=-4, to=4,length.out=1000) curve(dnorm(x, mean.post, std), add=TRUE, col='blue') x <- ch2[,1] lines(density(x,na.rm=T),col="red",lwd=2) hist(ch3[,1],main='chain2',freq=FALSE,breaks=20,xlab="Estimated Mean") x <- seq(from=-4, to=4,length.out=1000) curve(dnorm(x, mean.post, std), add=TRUE, col='blue') x <- ch3[,1] lines(density(x,na.rm=T),col="red",lwd=2) hist(ch4[,1],main='chain2',freq=FALSE,breaks=20,xlab="Estimated Mean") x <- seq(from=-4, to=4,length.out=1000) curve(dnorm(x, mean.post, std), add=TRUE, col='blue') x <- ch4[,1] lines(density(x,na.rm=T),col="red",lwd=2) par(mfrow=c(1,1)) hist(post.dist[,1],main='Posterior Distribution',freq=FALSE,breaks=20,xlab="Estimated Mean") x <- seq(from=-4, to=4,length.out=1000) curve(dnorm(x, mean.post, std), add=TRUE, col='blue',lwd=2) x <- post.dist[,1] lines(density(x,na.rm=T),col="red",lwd=2)