# Edps 590BAY
# Fall 2019
# C.J.Anderson
#
# To tune I tried different jump std deviation until I got acceptence
#   rate to about 40%
#
library(HDInterval)
library(MCMCpack)

setwd("C:\\Users\\cja\\Dropbox\\edps 590BAY\\Lectures\\5 MCMC")

#setwd("C:\\Users\\cja\\Dropbox\\edps 590BAY\\Lectures\\3 Normal")
# laptop
#setwd("C:\\Users\\Carolyn\\Dropbox\\edps 590BAY\\Lectures\\3 Normal")

anorex <- read.table("anorexia_data.txt",header=TRUE)

# create change 
anorex$change <- anorex$weight2 - anorex$weight1

#############################################
# Sample statistics
#############################################
n=nrow(anorex)
ybar = mean(anorex$change)
s2 = var(anorex$change)
stderr <- sqrt(s2/n)
sample.stat <- c(n,ybar,s2,stderr)
names(sample.stat) <- c("n","ybar","s2","stderr")
sample.stat

######################################################
# From file "metro_log_with_examples_works.txt" read 
# function metroLogNorm into console
######################################################
#
# Change jump std until find accept.  Note that tuner[,2]=1 
# for acceptance= for rejection
#
tuner <- metroLogNorm(niter=1000,anorex$change,ybar,stderr,1.75)
table(tuner[,2])

#
#  Easier to set than change everywhere
#
iter.set <- 5000

#################################################################
# Some tuning:  Try different jump sd
#################################################################
chain1 <- metroLogNorm(niter=1000,anorex$change,ybar,stderr,0.3)
chain2 <- metroLogNorm(niter=1000,anorex$change,ybar,stderr,1.0)
chain3 <- metroLogNorm(niter=1000,anorex$change,ybar,stderr,1.75)
chain4 <- metroLogNorm(niter=1000,anorex$change,ybar,stderr,2.0)

#
# What are acceptance rates
#
n1<- table(chain1[,2])
 (  a1 <- n1[2]/1000 * 100)
n2 <- table(chain2[,2])
 (  a2 <- n2[2]/1000 * 100)
n3 <- table(chain3[,2])
  ( a3 <- n3[2]/1000 * 100)
n4 <-table(chain4[,2])
 (  a4 <- n4[2]/1000 * 100)
#
# All samples and chains
#
par(mfrow=c(2,2))
x <- seq(1:1000)
plot(x,chain1[,1],type="l",col="blue",lwd=2,
      main="jump sd=0.3, acceptance=85.1%",
      xlab="Iteration (sample)",
      ylab="Value of parameter (mean)",
      ylim=c(-4.5,5)
)

plot(x,chain2[,1],type="l",col="blue",lwd=2,
      main="jump sd=1.00, acceptance=59.8%",
      xlab="Iteration (sample)",
      ylab="Value of parameter (mean)",
      ylim=c(-4.5,5)
)

plot(x,chain3[,1],type="l",col="blue",lwd=2,
      main="jump sd=1.75, acceptance=39.8%",
      xlab="Iteration (sample)",
      ylab="Value of parameter (mean)",
      ylim=c(-4.5,5)
)

plot(x,chain4[,1],type="l",col="blue",lwd=2,
      main="jump sd=2.0 acceptance=39.2%",
      xlab="Iteration (sample)",
      ylab="Value of parameter (mean)",
      ylim=c(-4.5,5)
)
####################################################################
# Run chains
####################################################################
chain1 <- metroLogNorm(niter=iter.set,anorex$change,ybar,stderr,1.75)
chain2 <- metroLogNorm(niter=iter.set,anorex$change,0,stderr,1.75)
chain3 <- metroLogNorm(niter=iter.set,anorex$change,-4,stderr,1.75)
chain4 <- metroLogNorm(niter=iter.set,anorex$change,4,stderr,1.75)

#
# What are acceptance rates
#
table(chain1[,2])
n1 <-table(chain1[,2])
n1[2]/iter.set * 100

table(chain2[,2])
n2 <-table(chain2[,2])
n2[2]/iter.set * 100

table(chain3[,2])
n3 <-table(chain3[,2])
n3[2]/iter.set * 100

table(chain4[,2])
n4 <-table(chain4[,2])
n4[2]/iter.set * 100

#
# First 20 iterations/samples
#
par(mfrow=c(1,1))
first <- seq(1:20)
plot(first,chain1[first,1],type="l",col="blue",lwd=2,
      main="Early iterations",
      xlab="Iteration (sample)",
      ylab="Value of parameter (mean)",
      ylim=c(-4.5,5)
)
lines(first,chain2[first,1],col="red",lwd=2)
lines(first,chain3[first,1],col="green",lwd=2)
lines(first,chain4[first,1],col="purple",lwd=2)

#
# All samples and chains
#
par(mfrow=c(1,1))
all <- seq(1:length(chain1[,1]))
plot(all,chain1[,1],type="l",col="blue",lwd=2,
      main="All Samples",
      xlab="Iteration (sample)",
      ylab="Value of parameter (mean)",
      ylim=c(-4.5,5)
)
lines(all,chain2[,1],col="red",lwd=2)
lines(all,chain3[,1],col="green",lwd=2)
lines(all,chain4[,1],col="purple",lwd=2)

#
# Use 1000-5000
#
ch1 <- chain1[1000:5000,]
ch2 <- chain2[1000:5000,]
ch3 <- chain3[1000:5000,]
ch4 <- chain4[1000:5000,]

#
# Combine distributions to get posterior distribution
#
post.dist <- rbind(ch1,ch2,ch3, ch4)

mean1 <- mean(ch1[,1])
mean2 <- mean(ch2[,1])
mean3 <- mean(ch3[,1])
mean4 <- mean(ch4[,1])
mean.post <-  mean(post.dist[,1])
(means <- c(mean.post, mean1,mean2,mean3,mean4))

summary(ch1[,1])
summary(ch2[,1])
summary(ch3[,1])
summary(ch4[,1])
summary(post.dist[,1])

#
# Auto-correlations
#
par(mfrow=c(2,2))
acf(ch1[,1], lag.max =iter.set, plot = TRUE)   
acf(ch2[,1], lag.max =iter.set, plot = TRUE)   
acf(ch3[,1], lag.max =iter.set, plot = TRUE)   
acf(ch4[,1], lag.max =iter.set, plot = TRUE)   


par(mfrow=c(1,1))
acf(post.dist[,1], lag.max =iter.set, plot = TRUE)   

#
# Densities
#

std <- sd(post.dist[,1])

par(mfrow=c(2,2))
hist(ch1[,1],main='chain1',freq=FALSE,breaks=20,xlab="Estimated Mean")
x <- seq(from=-4, to=4,length.out=1000)
curve(dnorm(x, mean.post, std),  add=TRUE, col='blue')
x <- ch1[,1]
lines(density(x,na.rm=T),col="red",lwd=2)


hist(ch2[,1],main='chain2',freq=FALSE,breaks=20,xlab="Estimated Mean")
x <- seq(from=-4, to=4,length.out=1000)
curve(dnorm(x, mean.post, std),  add=TRUE, col='blue')
x <-  ch2[,1]
lines(density(x,na.rm=T),col="red",lwd=2)


hist(ch3[,1],main='chain2',freq=FALSE,breaks=20,xlab="Estimated Mean")
x <- seq(from=-4, to=4,length.out=1000)
curve(dnorm(x, mean.post, std),  add=TRUE, col='blue')
x <-  ch3[,1]
lines(density(x,na.rm=T),col="red",lwd=2)


hist(ch4[,1],main='chain2',freq=FALSE,breaks=20,xlab="Estimated Mean")
x <- seq(from=-4, to=4,length.out=1000)
curve(dnorm(x, mean.post, std),  add=TRUE, col='blue')
x <-  ch4[,1]
lines(density(x,na.rm=T),col="red",lwd=2)




par(mfrow=c(1,1))
hist(post.dist[,1],main='Posterior Distribution',freq=FALSE,breaks=20,xlab="Estimated Mean")
x <- seq(from=-4, to=4,length.out=1000)
curve(dnorm(x, mean.post, std),  add=TRUE, col='blue',lwd=2)
x <- post.dist[,1]
lines(density(x,na.rm=T),col="red",lwd=2)