#  Metropolis algorithm for mu of normal with sigma2 and tau known
#  Edps 590BAY
#  Fall 2019
#  C.J.Anderson
#
#  Alogithm works for any size sample size since I used
#   log of terms in the "ratio" and compute log.ratio.
#  
#
# Input:
#   niter   = number of iterations
#   y       = data
#   current = starting value for mean
#   tau     = standarid deviation of mean
#   jump    = the standard deviation of proposal distribution
#
# Output:
#   x = estimates of posterior distribution of mu|y
#   iter.set = for how many iterations you want
#   sample statistics and bayesian estimates
#   plot of iteration by x (to see random walk)
#   histrogram of x
#
#
######################################################################################
# Start Function
######################################################################################
# If want to pause before next iteration active following line
#
par(ask=F)

metroLogNorm <- function(niter, y, current, tau, jump) {

               x <- matrix(c(current,0),nrow=1,ncol=2)       # save the results of random walk 
               iteration <- matrix(1,nrow=1,ncol=1)          # to plot accepted values on y-axis }
               n <- length(y)
               
               mu <- mean(y)
               sigma <- sd(y)

               for (i in 2:niter) {
                
                 propose <- rnorm(1,mean=x[i-1,1],sd=jump)
 
                 ln.ratio <- sum(dnorm(y, propose, sigma, log=T)) + dnorm(propose, mu, tau, log=T) - sum(dnorm(y, current, sigma, log=T)) - dnorm(current, mu, tau, log=T) 

                  u <- log( runif(1,0,1)   )
                  if (u <= ln.ratio) {
                     current <- propose
                     accept <- 1 
                  } else {
                     accept <- 0
                  }
            
                 x <- rbind(x,c(current,accept))

                 iteration <- c(iteration,i)          

                par(mfrow=c(2,1))
                plot(iteration,x[,1],type="l",xlim=c(0,niter),ylim=c(-5, 5),main="The random walk")
                hist(x[,1],main="histogram of walk",xlim=c(-5,5),breaks=20)
                }
            return(x)
             }
#########################################################################################
# End function
########################################################################################

#########################################################
#                                                       #
# Set iter.set to whatever you want and all examples    #
#  work OK.                                             #
#   This is how many samples you want                   #
#########################################################

iter.set <- 1000

#########################################################
# Need some data                                        #
#########################################################
y <- rnorm(100,0,2)

#######################################################
# With different starting points---check chains merge #
#######################################################

sim.n4 <- metroLogNorm(iter.set,y,-4,0.4,.3)
sim.0  <- metroLogNorm(iter.set,y, 0,0.4,.3)
sim.p4 <- metroLogNorm(iter.set,y, 4,0.4,.3)
sim.0j5  <- metroLogNorm(iter.set,y, 4,0.4,.5)
			
##########################################################		  
# How close are estimates to data values and each other? #
##########################################################
n <- length(y)
ybar <- mean(y)
sdy   <- sd(y)
msim.n4 <- mean(sim.n4[200:iter.set,1])               # means of each run
msim.0  <- mean(sim.0[200:iter.set,1]) 
msim.p4 <- mean(sim.p4[200:iter.set,1]) 
means <- c(ybar,msim.n4,msim.0,msim.p4)
names(means) <- c("Ybar", "sim.n4", "sim.0", "sim.p4")
means


# What is variability (standard deviations) within chains?
sdsim.n4 <- sd(sim.n4[200:iter.set,1])                # sd within chains
sdsim.0  <- sd(sim.0[200:iter.set,1]) 
sdsim.p4 <- sd(sim.p4[200:iter.set,1])
stdDev <- c(sdsim.n4,sdsim.0,sdsim.p4) 
names(stdDev) <- c("sdsim.n4","sdsim.0","sdsim.p4")
stdDev


########################################################################
# Illustrate Burn-in, mixing of chains, and convergence (stationary)
########################################################################

#
# first half of samples
#
par(mfrow=c(1,1))
mid.iter <- iter.set /2
iter.x <- seq(1:mid.iter)
early <- plot(iter.x,sim.n4[iter.x,1],type='l',
         main='Early iterations (maxiter=iter, tau=0.4, jump=0.3)',
         xlab="Iteration",
         ylab="Simulated Value", 
         ylim=c(-4.5,4.5),
         lwd=2,
         col="blue")
lines(iter.x,sim.0[iter.x,1], type="l", lwd=2, col="red")
lines(iter.x,sim.p4[iter.x,1], type='l', lwd=2, col="green")
legend("bottomright",  title="Staring value",
  legend = c("-4,", "0", "4"),  col = c("blue","red","green"),
  text.col = c("blue","red","green"),  lty=c(1,1),  bty = "y",  cex = 1.0)

#
# Later half of samples
#
par(mfrow=c(1,1))
mid.iter <- iter.set/2 +1
iter.x <- seq(from=mid.iter, to=iter.set)
plot(iter.x,sim.n4[iter.x,1],type='l',
         main='Later iterations (maxiter=1000), tau=0.4, jump=0.3)',
         xlab="Iteration",
         ylab="Simulated Value", 
         ylim=c(-4.5,4.5),
         lwd=2,
         col="blue")
lines(iter.x,sim.0[iter.x,1], type="l", lwd=2, col="red")
lines(iter.x,sim.p4[iter.x,1], type='l', lwd=2, col="green")
legend("bottomright",
  title="Staring value",  legend = c("-4,", "0", "4"),  
  col = c("blue","red","green"),  text.col = c("blue","red","green"),  
  lty=c(1,1),  bty = "y",  cex = 1.0)
  
#####################
# Auto correlations
#####################
par(mfrow=c(2,2))
acf(sim.n4[,1], lag.max = iter.set, plot = TRUE,main="mu_0 =-4.0")   
acf(sim.0[,1],  lag.max = iter.set, plot = TRUE,main="mu_0 = 0.0")   
acf(sim.p4[,1], lag.max = iter.set, plot = TRUE,main="mu_0 = 4.0")   
acf(sim.0j5[,1],  lag.max = iter.set, plot = TRUE,main="mu_0 = 0.0 & jump=0.5")   


#######################################
# Overlay normal on histograms on 
# posterior distributions         
#####################################
     
par(mfrow=c(2,2))

hist(sim.n4[,1],main='S=1000, start=-4, tau=0.5, jump=.3',freq=FALSE,xlim=c(-4,4),breaks=20)
x <- seq(from=-4, to=4,length.out=5000)
curve(dnorm(x,msim.n4,sdsim.n4),  add=TRUE, col='blue')
#curve(dnorm(x,msim.n4,sqrt(sdsim.n4)),  add=TRUE, col='blue')
#curve(dnorm(x,msim.n4,.2),  add=TRUE, col='blue')
x <- sim.n4[,1]
lines(density(x,na.rm=T),col="red",lwd=2)


hist(sim.0[,1],main='S=1000, start=0, tau=0.5, jump=.3',freq=FALSE,xlim=c(-4,4),breaks=20)
x <- seq(from=-4, to=4,length.out=1000)
curve(dnorm(x,msim.0,sdsim.0),  add=TRUE, col='blue')
x <- sim.0[,1]
lines(density(x,na.rm=T),col="red",lwd=2)


hist(sim.p4[,1],main='S=1000, start= 4, tau=0.5, jump=.3',freq=FALSE,xlim=c(-4,4),breaks=20)
x <- seq(from=-4, to=4,length.out=1000)
curve(dnorm(x,msim.p4,sdsim.p4),  add=TRUE, col='blue')
x <- sim.0[,1]
lines(density(x,na.rm=T),col="red",lwd=2)


############################################################
# Impact of standard deviation of jump distribution
############################################################
sim.j0  <- metroLogNorm(iter.set,y, 4,0.4,.1)
sim.j1  <- metroLogNorm(iter.set,y, 4,0.4,.3)
sim.j2  <- metroLogNorm(iter.set,y, 4,0.4,.5)
sim.j3  <- metroLogNorm(iter.set,y, 4,0.4,1.0)

# How close are estimates to data values and each other?
n <- length(y)
ybar <- mean(y)
sdy   <- sd(y)
msim.j0  <- mean(sim.j0[200:iter.set,1])               # means of each run
msim.j1  <- mean(sim.j1[200:iter.set,1]) 
msim.j2  <- mean(sim.j2[200:iter.set,1]) 
msim.j3  <- mean(sim.j3[200:iter.set,1]) 
means.j  <- c(ybar,msim.j0,msim.j1,msim.j2,msim.j3)
names(means.j) <- c("Ybar", "sim.j0", "sim.j1", "sim.j2", "sim.j3")
means.j


# What is variability (standard deviations) within chains?
sdsim.j0 <- sd(sim.j0[200:iter.set,1])                # sd within chains
sdsim.j1 <- sd(sim.j1[200:iter.set,1]) 
sdsim.j2 <- sd(sim.j2[200:iter.set,1])
sdsim.j3 <- sd(sim.j3[200:iter.set,1]) 
stdDev.j <- c(sdsim.j0,sdsim.j1,sdsim.j2,sdsim.j3) 
names(stdDev.j) <- c("sdsim.j0","sdsim.j1","sdsim.j2","sdsim.j3")
stdDev.j


#####################
# Auto correlations
#####################
par(mfrow=c(2,2))
acf(sim.j0[,1], lag.max = iter.set, plot = TRUE,main="jump=0.1")   
acf(sim.j1[,1],  lag.max = iter.set, plot = TRUE,main="jump=0.3")   
acf(sim.j2[,1], lag.max = iter.set, plot = TRUE,main="jump=0.5")   
acf(sim.j3[,1],  lag.max = iter.set, plot = TRUE,main="jump=1.0")