# Comparing 2 proportions: US Presdients 1789-1926 vs 1928-2016 # # Data file has missing values that are coded at 9999 (either uncontested # or height is unknown # # setwd("C:\\Users\\cja\\Documents\\Dropbox\\edps 590BAY\\Lectures\\2 Proportion") elections <- read.table(file="all_presidential_data.txt",header=T) elections <- na.omit(elections) # delete the ones with data missing # In the data set missing is indicated # by NA elections$oldnew <- ifelse(elections$year<1928,1,0) # create a variable used to split the data elections$oldnew <- as.factor(elections$oldnew) # new variable needs to be a factor head(elections) # look at data split.data <- split(elections,elections$oldnew) new <- split.data[1] new <- as.data.frame(new) names(new) <- c("year","winner","wcm","loser","lcm","wintaller","winlefty","oldnew") old <- split.data[2] old <- as.data.frame(old) names(old) <- c("year","winner","wcm","loser","lcm","wintaller","winlefty","oldnew") # Find the posteriors table(new$wintaller) y.new <- 16 # looking at result of table n.new <- nrow(new) a.new <- y.new + 1 # posterior is beta with a.new and b.new b.new <- n.new - y.new + 1 theta.new <- (a.new-1)/(a.new+b.new-2) table(old$wintaller) # posterior is beta with a.old and b.old y.old <- 15 n.old <- nrow(old) a.old <- y.new + 1 b.old <- n.old - y.old +1 theta.old <- (a.old-1)/(a.old+b.old-2) # Monte carlo S= 1e4 theta1 <- rbinom(S,n.new,theta.new) theta2 <- rbinom(S,n.old,theta.old) # Take look at the simulated posterior distributions par(mfrow=c(2,2)) hist(theta1,main="1e4 samples from posterior 1928-2016") hist(theta2,main="1e4 samples from posterior 1879-1924") plot(theta1,theta2,main="scatter plot of simulated values") plot.new( ) text(.5,.5,"r(theta1,theta2)=.0035") text(.49,.4,"Pr(theta1>theta2)~.5") # Find proportion/probability of times that theta1 > theta2 mean(theta1>theta2) # Another way... new.greater <- ifelse(theta1>theta2,1,0) table(new.greater)