# Comparing 2 proportions:  US Presdients 1789-1926 vs 1928-2016
#
# Data file has missing values that are coded at 9999 (either uncontested 
#   or height is unknown
#
#
setwd("C:\\Users\\cja\\Documents\\Dropbox\\edps 590BAY\\Lectures\\2 Proportion")

elections <- read.table(file="all_presidential_data.txt",header=T)


elections <- na.omit(elections)                      # delete the ones with data missing
                                                     # In the data set missing is indicated
                                                     # by NA

elections$oldnew <- ifelse(elections$year<1928,1,0)  # create a variable used to split the data

elections$oldnew <- as.factor(elections$oldnew)      # new variable needs to be a factor
head(elections)                                      # look at data

split.data <- split(elections,elections$oldnew)

new <- split.data[1]
new <- as.data.frame(new)
names(new) <- c("year","winner","wcm","loser","lcm","wintaller","winlefty","oldnew")

old <- split.data[2]
old <- as.data.frame(old)
names(old) <- c("year","winner","wcm","loser","lcm","wintaller","winlefty","oldnew")


# Find the posteriors
table(new$wintaller)
y.new <- 16            # looking at result of table
n.new <- nrow(new)

a.new <- y.new + 1          # posterior is beta with a.new and b.new
b.new <- n.new - y.new + 1
theta.new <- (a.new-1)/(a.new+b.new-2)   


table(old$wintaller)   # posterior is beta with a.old and b.old
y.old <- 15
n.old <- nrow(old)


a.old <- y.new + 1
b.old <- n.old - y.old +1
theta.old <- (a.old-1)/(a.old+b.old-2)   


# Monte carlo 
S= 1e4
theta1 <- rbinom(S,n.new,theta.new)
theta2 <- rbinom(S,n.old,theta.old)

# Take look at the simulated posterior distributions
par(mfrow=c(2,2))
hist(theta1,main="1e4 samples from posterior 1928-2016")
hist(theta2,main="1e4 samples from posterior 1879-1924")
plot(theta1,theta2,main="scatter plot of simulated values")
plot.new( )
text(.5,.5,"r(theta1,theta2)=.0035")
text(.49,.4,"Pr(theta1>theta2)~.5")


# Find proportion/probability of times that theta1 > theta2
mean(theta1>theta2)

# Another way...
new.greater <- ifelse(theta1>theta2,1,0)
table(new.greater)