# edps590BAY
# Fall 2019
# C.J.Anderson
#
# Example for binomial:  Taller candidate wins US presidental
#   election.  Taken from edps 589, original source "Chance"
#   but adding more elections (excluding 2016 where relationship
#   betwee gender & height becomes a facter
#
# For this example, shows case of 
#  o Prior is beta(1,1), i.e., uniform(0,1)
#  o posterior is beta
#  o mode of posterior
#  o mean of posterior
#  o credible intervals
#  o HDI 
#
#
# Uses package HDInterval
#
library(HDInterval)


# Data:
taller.2012 <- 16         # number taller who won    "y"
shorter.2012 <- 4         # number shorter who won  
n <- taller.2012+shorter.2012  # total number of elections


# Frequenist estimate:
p.hat <- taller.2012/n
p.hat

# or this and get exact test
binom.test(taller.2012 ,n)


#
# Set up for Bayesian inference using analytic results:
#
theta <-  seq(.001,1,.001)     # value of possible probabilities
prior <- dbeta(theta,1,1)      # same as uniform prior
post  <- dbeta(theta,taller.2012+1,shorter.2012+1) # posterior distribution

(post.mode <- p.hat )
y.mode<- c(0,4.56)
x.mode <- rep(post.mode,2)

(post.mean <- 17/(17+5) )
x.mean <- rep(post.mean,2) 
y.mean <- c(0,4.3) 
post.var <- (17*5)/( (17+5+1)*((17+5**2)) )  
post.sd <- sqrt(post.var) 

plot(theta,post,type="l",main="Posterior Distribution for Taller Candidate",
     ylim=c(0,5.5),col="black",xlab="Possible probabilities")
lines(theta,prior,type='l',col="grey")
text(0.2,1.2,"Prior: beta(1,1)")
text(.8,5.4,"Posterior: beta(17,5)")

lines(x.mode,y.mode,type="l",col="red")
text(.87,4.9,"mode=.80",col="red")
lines(x.mean,y.mean,type="l",col="purple")
text(.675,4.2,"mean=.77",col="purple")



# For 95% credible interval for taller
theta.95 <- c(.025,.975)
ci <- qbeta(theta.95,15,4)
ci
lower <- rep(ci[1],2)
upper <- rep(ci[2],2)
y.ci <- c(0,2)

plot(theta,post,type="l",main="95% Credible Intervals: (.58, .94)",
     ylim=c(0,5.5),col="black",xlab="Possible probabilities",ylab="Posterior Density")
lines(lower,y.ci,type='l')
lines(upper,y.ci,type='l')

#
# Frequentist resuts are pretty basically same in this 
#  example, but interpretaton of CIs is different
# phat = .80
# exact p-value = .00591
# "exact" confidence interval: (.56, .94)
# large sample ci: (.63,.98)
# or better asympotic method (.58, .92)
#   

# HDI inerval vs Credible Interval
#
inbeta <- rbeta(1E5,15,4)
hdi.p <- hdi(inbeta,credMass=.95)

Lhdi <- rep(hdi.p[1],2)
Uhdi <- rep(hdi.p[2],2)

par(mfrow=c(2,1))
plot(theta,post,type="l",main="95% Credible Interval: (.58, .94)",
     ylim=c(0,5.5),
     xlim=c(.4,1.0),
col="black",xlab="Possible probabilities",ylab="Posterior Density")
lines(lower,y.ci,type='l',col="blue")
lines(upper,y.ci,type='l',col="blue")

plot(theta,post,type="l",main="95% Highest Density Interval: (.60, .93)",
     ylim=c(0,5.5),
     xlim=c(.4,1.0),
col="black",xlab="Possible probabilities")
lines(Lhdi,y.ci,type='l',col="red")
lines(Uhdi,y.ci,type='l',col="red")