# edps590BAY # Fall 2019 # C.J.Anderson # # Example for binomial: Taller candidate wins US presidental # election. Taken from edps 589, original source "Chance" # but adding more elections (excluding 2016 where relationship # betwee gender & height becomes a facter # # For this example, shows case of # o Prior is beta(1,1), i.e., uniform(0,1) # o posterior is beta # o mode of posterior # o mean of posterior # o credible intervals # o HDI # # # Uses package HDInterval # library(HDInterval) # Data: taller.2012 <- 16 # number taller who won "y" shorter.2012 <- 4 # number shorter who won n <- taller.2012+shorter.2012 # total number of elections # Frequenist estimate: p.hat <- taller.2012/n p.hat # or this and get exact test binom.test(taller.2012 ,n) # # Set up for Bayesian inference using analytic results: # theta <- seq(.001,1,.001) # value of possible probabilities prior <- dbeta(theta,1,1) # same as uniform prior post <- dbeta(theta,taller.2012+1,shorter.2012+1) # posterior distribution (post.mode <- p.hat ) y.mode<- c(0,4.56) x.mode <- rep(post.mode,2) (post.mean <- 17/(17+5) ) x.mean <- rep(post.mean,2) y.mean <- c(0,4.3) post.var <- (17*5)/( (17+5+1)*((17+5**2)) ) post.sd <- sqrt(post.var) plot(theta,post,type="l",main="Posterior Distribution for Taller Candidate", ylim=c(0,5.5),col="black",xlab="Possible probabilities") lines(theta,prior,type='l',col="grey") text(0.2,1.2,"Prior: beta(1,1)") text(.8,5.4,"Posterior: beta(17,5)") lines(x.mode,y.mode,type="l",col="red") text(.87,4.9,"mode=.80",col="red") lines(x.mean,y.mean,type="l",col="purple") text(.675,4.2,"mean=.77",col="purple") # For 95% credible interval for taller theta.95 <- c(.025,.975) ci <- qbeta(theta.95,15,4) ci lower <- rep(ci[1],2) upper <- rep(ci[2],2) y.ci <- c(0,2) plot(theta,post,type="l",main="95% Credible Intervals: (.58, .94)", ylim=c(0,5.5),col="black",xlab="Possible probabilities",ylab="Posterior Density") lines(lower,y.ci,type='l') lines(upper,y.ci,type='l') # # Frequentist resuts are pretty basically same in this # example, but interpretaton of CIs is different # phat = .80 # exact p-value = .00591 # "exact" confidence interval: (.56, .94) # large sample ci: (.63,.98) # or better asympotic method (.58, .92) # # HDI inerval vs Credible Interval # inbeta <- rbeta(1E5,15,4) hdi.p <- hdi(inbeta,credMass=.95) Lhdi <- rep(hdi.p[1],2) Uhdi <- rep(hdi.p[2],2) par(mfrow=c(2,1)) plot(theta,post,type="l",main="95% Credible Interval: (.58, .94)", ylim=c(0,5.5), xlim=c(.4,1.0), col="black",xlab="Possible probabilities",ylab="Posterior Density") lines(lower,y.ci,type='l',col="blue") lines(upper,y.ci,type='l',col="blue") plot(theta,post,type="l",main="95% Highest Density Interval: (.60, .93)", ylim=c(0,5.5), xlim=c(.4,1.0), col="black",xlab="Possible probabilities") lines(Lhdi,y.ci,type='l',col="red") lines(Uhdi,y.ci,type='l',col="red")