# 3-way table: marignal dependence, conditional independence (blue colar worker) # Edps 589 # Fall 2018 # C.J.Anderson # # This first part of this script is from a previous lecture # The 2nd part is new for lecture on log-linear models # ############################################################## # Set up # ############################################################## library(vcd) library(vcdExtra) library(MASS) library(DescTools) library(lawstat) var.values <- expand.grid(worker=c("low","high"), superv=c("low","high"), manager=c("bad","good")) counts <- c(103, 87, 32, 42, 59, 109, 78, 205) bcolar <- cbind(var.values,counts) bcolar ############################################################## # Part 1 # ############################################################## # 2-way marginal table worker.by.superv <- xtabs(counts ~ worker+superv,data=bcolar) addmargins(worker.by.superv) # 2-way marginal analysis: assocstats(worker.by.superv) # Compute G2 and X2 ( OR <- oddsratio(worker.by.superv) ) # Compute odds ratio and view summary(OR) # signficant test for odds ratio -- don't really # need because have X2 and G2 from assocstats() exp(0.6183896) # Convert log odds ratio to odds ratio exp(confint(OR) ) # Confidnece interval of odds ratio # 3-way Table of data # -- partial or conditional tables bcolar.tab <- xtabs(counts ~ worker+superv+manager,data=bcolar) addmargins(bcolar.tab) # 3-way contitional/partial table analysis assocstats(bcolar.tab) # Compute X2 and G2 ( OR <- oddsratio(bcolar.tab) ) # Compute odds ratio & view ( sum.or <-summary(OR) ) # Signficant test for odds ratios exp(sum.or[1:2]) exp( confint(OR) ) # Confidence interval of odds ratios # The set of tests: # First: Test for homogenity (shouldn't be significant...p should = 1 WoolfTest(bcolar.tab) # Brewsow-Day BreslowDayTest(bcolor.tab, OR = NA, correct = FALSE) # If reatain, then Test for conditional independence CMHtest(bcolar.tab) # If data are conditionally independent, estimate their Common odds ratio mantelhaen.test(bcolar.tab,alternative = c("two.sided"), correct = FALSE, exact = FALSE, conf.level = 0.95) # From lawstat package: test and common odds ratio cmh.test(bcolar.tab) ###################################################################### # Part 2: Log-linear models # ###################################################################### (bc.tab <- xtabs(counts ~ manager+superv+worker,data=bcolar) ) (bc.data <- as.data.frame(bc.tab)) # # Complete independence: M,S,W # summary(mod1 <- glm(Freq ~ manager+superv+worker,data=bc.data,family=poisson)) (X2.1 <- sum(residuals(mod1,type=c("pearson"))**2)) bc.data$fit1 <- mod1$fitted # # Joint independence: # summary(mod2a <- glm(Freq ~ manager+superv+worker+ manager*superv,data=bc.data,family=poisson)) (X2.2a <- sum(residuals(mod2a,type=c("pearson"))**2)) bc.data$fit2a <- mod2a$fitted summary(mod2b <- glm(Freq ~ manager+superv+worker+ manager*worker,data=bc.data,family=poisson)) (X2.2b <- sum(residuals(mod2b,type=c("pearson"))**2)) bc.data$fit2b <- mod2b$fitted summary(mod2c <- glm(Freq ~ manager+superv+worker+ superv*worker,data=bc.data,family=poisson)) (X2.2c <- sum(residuals(mod2c,type=c("pearson"))**2)) bc.data$fit2c <- mod2c$fitted # # Conditional independence: # summary(mod3a <- glm(Freq ~ manager+superv+worker+ manager*superv + manager*worker,data=bc.data,family=poisson)) (X2.3a <- sum(residuals(mod3a,type=c("pearson"))**2)) bc.data$fit3a <- mod3a$fitted bc.data$stdres3a <- rstandard(mod3a,type="pearson") summary(mod3b <- glm(Freq ~ manager+superv+worker+ superv*worker + manager*worker,data=bc.data,family=poisson)) (X2.3b <- sum(residuals(mod3b,type=c("pearson"))**2)) bc.data$fit3b <- mod3b$fitted summary(mod3c <- glm(Freq ~ manager+superv+worker+ superv*worker + manager*superv,data=bc.data,family=poisson)) (X2.3c <- sum(residuals(mod3c,type=c("pearson"))**2)) bc.data$fit3c <- mod3c$fitted # # Homogeneous Association: MS,MW,SW # summary(mod4 <- glm(Freq ~ manager+superv+worker+ manager*superv + manager*worker+ superv*worker,data=bc.data,family=poisson)) (X2.4 <- sum(residuals(mod4,type=c("pearson"))**2)) bc.data$fit4 <- mod4$fitted bc.data$stdres4 <- rstandard(mod4,type="pearson") bc.data # # Tests of partial association # three.a <- anova(mod3a,mod4) # Gets test statistic,...and for the pvalue: 1-pchisq(three.a[2,4],three.a[2,1]) # you can put the actual numbers in from # the output, or you can use this (for tables # with 2 lines (residual and one effect) three.b <- anova(mod3b,mod4) 1-pchisq(three.a[2,4],three.a[2,1]) three.c <- anova(mod3c,mod4) 1-pchisq(three.c[2,4],three.c[2,1]) # # Dissimilarity Index # # I need sample size for a few things, so just compute once and use it N <- sum(bc.data$Freq) (D.mod3a <- sum(abs(bc.data$Freq - mod3a$fitted)/(2*N))) (D.mod4 <- sum(abs(bc.data$Freq - mod4$fitted)/(2*N))) # # Correlations # cor(bc.data$Freq,mod3a$fitted) cor(bc.data$Freq,mod4$fitted) # # BIC (AIC is given in glm summary) # mod3a$deviance - mod3a$df.residual* log(N) mod4$deviance - mod4$df.residual* log(N)