# Edps/Psych/Soc 589
# Fall 2018
# C.J.Anderson
#
# Answer key homework 2
#

library(vcd)    
library(vcdExtra)
###########################################################
#problem 1:  2.19 in Agresti (2007)                       #
###########################################################

# enter data
var.levels <- expand.grid(party=c("democrat","independent","republican"),
                        race=c("white","black"))
(party <- data.frame(var.levels,count=c(871, 444, 873, 302, 80, 43)) )

# 1(a)
# -- make table object from data frame
( party.tab <- xtabs(count ~ race + party, data=party) )
# -- compute test statistics
assocstats(party.tab)

# 1(b)
# -- fit independence model 
( model.0 <- glm(count ~ race + party, data=party, family=poisson))
# -- adjusted standarized residuals. I put them into data frame to help.
party$rstd <- rstandard(model.0, type="pearson")  # this gets the Haberman residuals
party
# -- in table form sometimes help to spot patterns and useful for interpretation
xtabs(rstd ~ race + party, data=party)

# 1(c)
# There are a number of ways to parition.  Based on residuals, I think
#   association is mostly due to democrats vs republicans.

# - Table 1: democrats vs republicans
# ---- gets G2 for simpler model 
(g2.table1 <- update(model.0,subset=(race=="black" | race=="white") &
             (party=="democrat" | party=="republican"))$deviance)
# ---- p-value	
(p <- 1-pchisq(fit1,1) )
# ---- odds ratio
(odds.ratio <- (871*43)/(302*873))
# ....or 
1/odds.ratio
	
# - Table 2:  democrats & republicans vs independents	
# ----  new variable 
party$select <- rep(c("demo & repub","independent","demo & repub"),2)
# ---- sum over selected 
subTable2 <- aggregate(party$count, by=list(select=party$select,race=party$race),sum)
# ---- as a table
xtabs(x ~ race + select, data=subTable2)
# ---- fit model with further sub-setting and ask for deviance, data=schizo.sub2, subset=(scho
(fit2<- glm(x ~ select+ race, family=poisson, data=subTable2)$deviance)
1-pchisq(fit2,1)
(1744*80)/(346*444)

##########################################################
# Problem 2:  2.27 Agresti (2007)                        #
##########################################################
var.levels <- expand.grid(family.inc=c("low","middle","high"),
                          educ.goals=c("someHS","HSgrad","someCollege","CollegeGrad"))
data2 <- data.frame(var.levels,count=c(9, 11, 9,  44, 52, 41,  13, 23, 12,  10, 22, 27))						  
# ---- as a table
(data2.tab <- xtabs(count ~ family.inc + educ.goals,data=data2))

# 2(a)
assocstats(data2.tab)

# 2(b)
model.0 <- glm(count ~ family.inc + educ.goals,data=data2,family=poisson) 
data2$rstd <- rstandard(model.0,type="pearson")
xtabs(rstd ~ family.inc + educ.goals, data=data2)

# 2(c)
# --- test for non-zero correlation (and other)
CMHtest(data2.tab,
        strata=NULL,
		rscores=1:3,  
		cscores=1:4)
		
# --- solving M2=r2*(n-1) for r
r <- sqrt(4.7489/(sum(data2$count)-1)	)	

######################################################
# Problem 3 : coffee consumption and heart disease   #
######################################################
var.levels <- expand.grid(coffee=c("heavy","not.heavy"),
                          chd=c("yes","no"))
						  
heart <- data.frame(var.levels, count=c(40,26,35,50))
	
(heart.tab <-xtabs(count~ coffee + chd, data=heart))

assocstats(heart.tab)

# --- you can select elements of the table	  
(odds.ratio <- (heart.tab[1,1]*heart.tab[2,2])/(heart.tab[2,1]*heart.tab[1,2]) )