# Metropolis algorithm for mu of normal with sigma2 and tau known # Edps 590BAY # Fall 2019 # C.J.Anderson # # Alogithm works for any size sample size since I used # log of terms in the "ratio" and compute log.ratio. # # # Input: # niter = number of iterations # y = data # current = starting value for mean # tau = standarid deviation of mean # jump = the standard deviation of proposal distribution # # Output: # x = estimates of posterior distribution of mu|y # iter.set = for how many iterations you want # sample statistics and bayesian estimates # plot of iteration by x (to see random walk) # histrogram of x # # ###################################################################################### # Start Function ###################################################################################### # If want to pause before next iteration active following line # par(ask=F) metroLogNorm <- function(niter, y, current, tau, jump) { x <- matrix(c(current,0),nrow=1,ncol=2) # save the results of random walk iteration <- matrix(1,nrow=1,ncol=1) # to plot accepted values on y-axis } n <- length(y) mu <- mean(y) sigma <- sd(y) for (i in 2:niter) { propose <- rnorm(1,mean=x[i-1,1],sd=jump) ln.ratio <- sum(dnorm(y, propose, sigma, log=T)) + dnorm(propose, mu, tau, log=T) - sum(dnorm(y, current, sigma, log=T)) - dnorm(current, mu, tau, log=T) u <- log( runif(1,0,1) ) if (u <= ln.ratio) { current <- propose accept <- 1 } else { accept <- 0 } x <- rbind(x,c(current,accept)) iteration <- c(iteration,i) par(mfrow=c(2,1)) plot(iteration,x[,1],type="l",xlim=c(0,niter),ylim=c(-5, 5),main="The random walk") hist(x[,1],main="histogram of walk",xlim=c(-5,5),breaks=20) } return(x) } ######################################################################################### # End function ######################################################################################## ######################################################### # # # Set iter.set to whatever you want and all examples # # work OK. # # This is how many samples you want # ######################################################### iter.set <- 1000 ######################################################### # Need some data # ######################################################### y <- rnorm(100,0,2) ####################################################### # With different starting points---check chains merge # ####################################################### sim.n4 <- metroLogNorm(iter.set,y,-4,0.4,.3) sim.0 <- metroLogNorm(iter.set,y, 0,0.4,.3) sim.p4 <- metroLogNorm(iter.set,y, 4,0.4,.3) sim.0j5 <- metroLogNorm(iter.set,y, 4,0.4,.5) ########################################################## # How close are estimates to data values and each other? # ########################################################## n <- length(y) ybar <- mean(y) sdy <- sd(y) msim.n4 <- mean(sim.n4[200:iter.set,1]) # means of each run msim.0 <- mean(sim.0[200:iter.set,1]) msim.p4 <- mean(sim.p4[200:iter.set,1]) means <- c(ybar,msim.n4,msim.0,msim.p4) names(means) <- c("Ybar", "sim.n4", "sim.0", "sim.p4") means # What is variability (standard deviations) within chains? sdsim.n4 <- sd(sim.n4[200:iter.set,1]) # sd within chains sdsim.0 <- sd(sim.0[200:iter.set,1]) sdsim.p4 <- sd(sim.p4[200:iter.set,1]) stdDev <- c(sdsim.n4,sdsim.0,sdsim.p4) names(stdDev) <- c("sdsim.n4","sdsim.0","sdsim.p4") stdDev ######################################################################## # Illustrate Burn-in, mixing of chains, and convergence (stationary) ######################################################################## # # first half of samples # par(mfrow=c(1,1)) mid.iter <- iter.set /2 iter.x <- seq(1:mid.iter) early <- plot(iter.x,sim.n4[iter.x,1],type='l', main='Early iterations (maxiter=iter, tau=0.4, jump=0.3)', xlab="Iteration", ylab="Simulated Value", ylim=c(-4.5,4.5), lwd=2, col="blue") lines(iter.x,sim.0[iter.x,1], type="l", lwd=2, col="red") lines(iter.x,sim.p4[iter.x,1], type='l', lwd=2, col="green") legend("bottomright", title="Staring value", legend = c("-4,", "0", "4"), col = c("blue","red","green"), text.col = c("blue","red","green"), lty=c(1,1), bty = "y", cex = 1.0) # # Later half of samples # par(mfrow=c(1,1)) mid.iter <- iter.set/2 +1 iter.x <- seq(from=mid.iter, to=iter.set) plot(iter.x,sim.n4[iter.x,1],type='l', main='Later iterations (maxiter=1000), tau=0.4, jump=0.3)', xlab="Iteration", ylab="Simulated Value", ylim=c(-4.5,4.5), lwd=2, col="blue") lines(iter.x,sim.0[iter.x,1], type="l", lwd=2, col="red") lines(iter.x,sim.p4[iter.x,1], type='l', lwd=2, col="green") legend("bottomright", title="Staring value", legend = c("-4,", "0", "4"), col = c("blue","red","green"), text.col = c("blue","red","green"), lty=c(1,1), bty = "y", cex = 1.0) ##################### # Auto correlations ##################### par(mfrow=c(2,2)) acf(sim.n4[,1], lag.max = iter.set, plot = TRUE,main="mu_0 =-4.0") acf(sim.0[,1], lag.max = iter.set, plot = TRUE,main="mu_0 = 0.0") acf(sim.p4[,1], lag.max = iter.set, plot = TRUE,main="mu_0 = 4.0") acf(sim.0j5[,1], lag.max = iter.set, plot = TRUE,main="mu_0 = 0.0 & jump=0.5") ####################################### # Overlay normal on histograms on # posterior distributions ##################################### par(mfrow=c(2,2)) hist(sim.n4[,1],main='S=1000, start=-4, tau=0.5, jump=.3',freq=FALSE,xlim=c(-4,4),breaks=20) x <- seq(from=-4, to=4,length.out=5000) curve(dnorm(x,msim.n4,sdsim.n4), add=TRUE, col='blue') #curve(dnorm(x,msim.n4,sqrt(sdsim.n4)), add=TRUE, col='blue') #curve(dnorm(x,msim.n4,.2), add=TRUE, col='blue') x <- sim.n4[,1] lines(density(x,na.rm=T),col="red",lwd=2) hist(sim.0[,1],main='S=1000, start=0, tau=0.5, jump=.3',freq=FALSE,xlim=c(-4,4),breaks=20) x <- seq(from=-4, to=4,length.out=1000) curve(dnorm(x,msim.0,sdsim.0), add=TRUE, col='blue') x <- sim.0[,1] lines(density(x,na.rm=T),col="red",lwd=2) hist(sim.p4[,1],main='S=1000, start= 4, tau=0.5, jump=.3',freq=FALSE,xlim=c(-4,4),breaks=20) x <- seq(from=-4, to=4,length.out=1000) curve(dnorm(x,msim.p4,sdsim.p4), add=TRUE, col='blue') x <- sim.0[,1] lines(density(x,na.rm=T),col="red",lwd=2) ############################################################ # Impact of standard deviation of jump distribution ############################################################ sim.j0 <- metroLogNorm(iter.set,y, 4,0.4,.1) sim.j1 <- metroLogNorm(iter.set,y, 4,0.4,.3) sim.j2 <- metroLogNorm(iter.set,y, 4,0.4,.5) sim.j3 <- metroLogNorm(iter.set,y, 4,0.4,1.0) # How close are estimates to data values and each other? n <- length(y) ybar <- mean(y) sdy <- sd(y) msim.j0 <- mean(sim.j0[200:iter.set,1]) # means of each run msim.j1 <- mean(sim.j1[200:iter.set,1]) msim.j2 <- mean(sim.j2[200:iter.set,1]) msim.j3 <- mean(sim.j3[200:iter.set,1]) means.j <- c(ybar,msim.j0,msim.j1,msim.j2,msim.j3) names(means.j) <- c("Ybar", "sim.j0", "sim.j1", "sim.j2", "sim.j3") means.j # What is variability (standard deviations) within chains? sdsim.j0 <- sd(sim.j0[200:iter.set,1]) # sd within chains sdsim.j1 <- sd(sim.j1[200:iter.set,1]) sdsim.j2 <- sd(sim.j2[200:iter.set,1]) sdsim.j3 <- sd(sim.j3[200:iter.set,1]) stdDev.j <- c(sdsim.j0,sdsim.j1,sdsim.j2,sdsim.j3) names(stdDev.j) <- c("sdsim.j0","sdsim.j1","sdsim.j2","sdsim.j3") stdDev.j ##################### # Auto correlations ##################### par(mfrow=c(2,2)) acf(sim.j0[,1], lag.max = iter.set, plot = TRUE,main="jump=0.1") acf(sim.j1[,1], lag.max = iter.set, plot = TRUE,main="jump=0.3") acf(sim.j2[,1], lag.max = iter.set, plot = TRUE,main="jump=0.5") acf(sim.j3[,1], lag.max = iter.set, plot = TRUE,main="jump=1.0")