# Edps/Psych/Soc 589 # Fall 2018 # C.J.Anderson # # Answer key homework 2 # library(vcd) library(vcdExtra) ########################################################### #problem 1: 2.19 in Agresti (2007) # ########################################################### # enter data var.levels <- expand.grid(party=c("democrat","independent","republican"), race=c("white","black")) (party <- data.frame(var.levels,count=c(871, 444, 873, 302, 80, 43)) ) # 1(a) # -- make table object from data frame ( party.tab <- xtabs(count ~ race + party, data=party) ) # -- compute test statistics assocstats(party.tab) # 1(b) # -- fit independence model ( model.0 <- glm(count ~ race + party, data=party, family=poisson)) # -- adjusted standarized residuals. I put them into data frame to help. party$rstd <- rstandard(model.0, type="pearson") # this gets the Haberman residuals party # -- in table form sometimes help to spot patterns and useful for interpretation xtabs(rstd ~ race + party, data=party) # 1(c) # There are a number of ways to parition. Based on residuals, I think # association is mostly due to democrats vs republicans. # - Table 1: democrats vs republicans # ---- gets G2 for simpler model (g2.table1 <- update(model.0,subset=(race=="black" | race=="white") & (party=="democrat" | party=="republican"))$deviance) # ---- p-value (p <- 1-pchisq(fit1,1) ) # ---- odds ratio (odds.ratio <- (871*43)/(302*873)) # ....or 1/odds.ratio # - Table 2: democrats & republicans vs independents # ---- new variable party$select <- rep(c("demo & repub","independent","demo & repub"),2) # ---- sum over selected subTable2 <- aggregate(party$count, by=list(select=party$select,race=party$race),sum) # ---- as a table xtabs(x ~ race + select, data=subTable2) # ---- fit model with further sub-setting and ask for deviance, data=schizo.sub2, subset=(scho (fit2<- glm(x ~ select+ race, family=poisson, data=subTable2)$deviance) 1-pchisq(fit2,1) (1744*80)/(346*444) ########################################################## # Problem 2: 2.27 Agresti (2007) # ########################################################## var.levels <- expand.grid(family.inc=c("low","middle","high"), educ.goals=c("someHS","HSgrad","someCollege","CollegeGrad")) data2 <- data.frame(var.levels,count=c(9, 11, 9, 44, 52, 41, 13, 23, 12, 10, 22, 27)) # ---- as a table (data2.tab <- xtabs(count ~ family.inc + educ.goals,data=data2)) # 2(a) assocstats(data2.tab) # 2(b) model.0 <- glm(count ~ family.inc + educ.goals,data=data2,family=poisson) data2$rstd <- rstandard(model.0,type="pearson") xtabs(rstd ~ family.inc + educ.goals, data=data2) # 2(c) # --- test for non-zero correlation (and other) CMHtest(data2.tab, strata=NULL, rscores=1:3, cscores=1:4) # --- solving M2=r2*(n-1) for r r <- sqrt(4.7489/(sum(data2$count)-1) ) ###################################################### # Problem 3 : coffee consumption and heart disease # ###################################################### var.levels <- expand.grid(coffee=c("heavy","not.heavy"), chd=c("yes","no")) heart <- data.frame(var.levels, count=c(40,26,35,50)) (heart.tab <-xtabs(count~ coffee + chd, data=heart)) assocstats(heart.tab) # --- you can select elements of the table (odds.ratio <- (heart.tab[1,1]*heart.tab[2,2])/(heart.tab[2,1]*heart.tab[1,2]) )