# 3-way table:   marignal dependence, conditional independence (blue colar worker)
# Edps 589
# Fall 2018
# C.J.Anderson
#
# This first part of this script is from a previous lecture
# The 2nd part is new for lecture on log-linear models
#
##############################################################
# Set up                                                     #
##############################################################
library(vcd)
library(vcdExtra)
library(MASS)
library(DescTools)
library(lawstat)

var.values <- expand.grid(worker=c("low","high"),
                          superv=c("low","high"),
			              manager=c("bad","good"))
counts <- c(103, 87, 32, 42, 59, 109, 78, 205)	
bcolar <- cbind(var.values,counts)
bcolar
##############################################################
#  Part 1                                                    #
##############################################################

# 2-way marginal table
worker.by.superv <- xtabs(counts ~ worker+superv,data=bcolar) 
addmargins(worker.by.superv)

# 2-way marginal analysis:  
assocstats(worker.by.superv)             # Compute G2 and X2
( OR <- oddsratio(worker.by.superv)  )   # Compute odds ratio and view
summary(OR)                              # signficant test for odds ratio -- don't really 
                                         #   need because have X2 and G2 from assocstats()
exp(0.6183896)                           # Convert log odds ratio to odds ratio
exp(confint(OR) )                        # Confidnece interval of odds ratio



# 3-way  Table of data
#  -- partial or conditional tables
bcolar.tab <- xtabs(counts ~ worker+superv+manager,data=bcolar)
addmargins(bcolar.tab)

# 3-way contitional/partial table analysis 
assocstats(bcolar.tab)                    # Compute X2 and G2
( OR <- oddsratio(bcolar.tab)  )              # Compute odds ratio & view
( sum.or <-summary(OR)    )                            # Signficant test for odds ratios
exp(sum.or[1:2])
exp( confint(OR) )                        # Confidence interval of odds ratios

# The set of tests:
# First: Test for homogenity (shouldn't be significant...p should = 1
WoolfTest(bcolar.tab)

# Brewsow-Day
BreslowDayTest(bcolor.tab, OR = NA, correct = FALSE)


# If reatain, then Test for conditional independence
CMHtest(bcolar.tab)

# If data are conditionally independent, estimate their Common odds ratio
mantelhaen.test(bcolar.tab,alternative = c("two.sided"),
                correct = FALSE, exact = FALSE, conf.level = 0.95)

# From lawstat package:  test and common odds ratio
cmh.test(bcolar.tab)



######################################################################
# Part 2: Log-linear models                                          #
######################################################################

(bc.tab <- xtabs(counts ~ manager+superv+worker,data=bcolar) )

(bc.data <- as.data.frame(bc.tab))

#
# Complete independence: M,S,W
#
summary(mod1 <- glm(Freq ~ manager+superv+worker,data=bc.data,family=poisson))
(X2.1 <- sum(residuals(mod1,type=c("pearson"))**2))
bc.data$fit1 <- mod1$fitted

#
# Joint independence: 
#
summary(mod2a <- glm(Freq ~ manager+superv+worker+ manager*superv,data=bc.data,family=poisson))
(X2.2a <- sum(residuals(mod2a,type=c("pearson"))**2))
bc.data$fit2a <- mod2a$fitted

summary(mod2b <- glm(Freq ~ manager+superv+worker+ manager*worker,data=bc.data,family=poisson))
(X2.2b <- sum(residuals(mod2b,type=c("pearson"))**2))
bc.data$fit2b <- mod2b$fitted

summary(mod2c <- glm(Freq ~ manager+superv+worker+ superv*worker,data=bc.data,family=poisson))
(X2.2c <- sum(residuals(mod2c,type=c("pearson"))**2))
bc.data$fit2c <- mod2c$fitted

#
# Conditional independence:
#
summary(mod3a <- glm(Freq ~ manager+superv+worker+ manager*superv + manager*worker,data=bc.data,family=poisson))
(X2.3a <- sum(residuals(mod3a,type=c("pearson"))**2))
bc.data$fit3a <- mod3a$fitted
bc.data$stdres3a <- rstandard(mod3a,type="pearson")

summary(mod3b <- glm(Freq ~ manager+superv+worker+ superv*worker + manager*worker,data=bc.data,family=poisson))
(X2.3b <- sum(residuals(mod3b,type=c("pearson"))**2))
bc.data$fit3b <- mod3b$fitted

summary(mod3c <- glm(Freq ~ manager+superv+worker+ superv*worker + manager*superv,data=bc.data,family=poisson))
(X2.3c <- sum(residuals(mod3c,type=c("pearson"))**2))
bc.data$fit3c <- mod3c$fitted

#
# Homogeneous Association: MS,MW,SW
#
summary(mod4 <- glm(Freq ~ manager+superv+worker+ manager*superv + manager*worker+ superv*worker,data=bc.data,family=poisson))
(X2.4 <- sum(residuals(mod4,type=c("pearson"))**2))
bc.data$fit4 <- mod4$fitted
bc.data$stdres4 <- rstandard(mod4,type="pearson")

bc.data


#
# Tests of partial association
#
three.a <- anova(mod3a,mod4)          # Gets test statistic,...and for the pvalue:
1-pchisq(three.a[2,4],three.a[2,1])   # you can put the actual numbers in from 
                                      # the output, or you can use this (for tables
									  # with 2 lines (residual and one effect)
three.b <- anova(mod3b,mod4)
1-pchisq(three.a[2,4],three.a[2,1])

three.c <- anova(mod3c,mod4)
1-pchisq(three.c[2,4],three.c[2,1])

#
# Dissimilarity Index
#
# I need sample size for a few things, so just compute once and use it
N <- sum(bc.data$Freq)

(D.mod3a <- sum(abs(bc.data$Freq - mod3a$fitted)/(2*N)))

(D.mod4 <- sum(abs(bc.data$Freq - mod4$fitted)/(2*N))) 


#
# Correlations
#
cor(bc.data$Freq,mod3a$fitted)

cor(bc.data$Freq,mod4$fitted)

#
# BIC  (AIC is given in glm summary)
#
mod3a$deviance - mod3a$df.residual* log(N)

mod4$deviance - mod4$df.residual* log(N)