# Ordinal linear trend test
# Edps 589
# Fall 2018
# C.J.Anderson
#
#  -- Use gss data
#  -- CMH test
#  -- Power of CMH test over general G2
#  -- Function that plots power curves and produces power table
#  -- Example of function using GSS data and modification of M2
#       to show that M2 isn't always more powerful.

#  Packages that I will probably use
library(MASS)
library(vcd)
library(vcdExtra)

# Where things will live on laptop
setwd("D:\\Dropbox\\edps 589\\2 Chi-square")

# Use GSS data
gss <- read.table("gss_data.txt",header=TRUE)
head(gss)

gss.tab <- xtabs(count ~ fechld + mapaid, data=gss)

# Cochran-Mantel-Haenszel test of association
#   (vcdExtra package)
# Note:	Can get more than just M:  
#           types=c("cor","rmeans","cmeans","general")

CMHtest(gss.tab,
        strata=NULL,
		rscores=1:4,  
		cscores=1:5,
		 types=c("cor","general")
		)
		
# using the fact that M= (n-1)r^2

n <- sum(gss.tab)
( r <- sqrt( 36.26132 /(n-1)) )

####### Power for G2 for independence (any kind of association)#######
#  Use output from CMHtest
#  Used a bit of trial to put things where I wanted them
G2 <- 47.522
df <- 12

x <- seq(from=.01, to=100, length=10000)
null.chisq <- dchisq(x,df)              # central chi-square
alt.chisq  <- dchisq(x,df,G2)           # non-centeral chi-square
plot(x,null.chisq,type='l',col='blue',
     main='Null and Alternative for General Test of Association',
	 ylab=' ')
lines(x,alt.chisq,type='l',col='red')
crit.value <- qchisq(.95,df)            # quantile for p=.05
lines(c(crit.value,crit.value),c(0,max(null.chisq)),type='l',col='black')

# p-value
x.shade <- c(crit.value,crit.value,seq(from=crit.value, to=100, by=.01),100)
y.values <- c(0,crit.value,seq(from=crit.value, to=100, by=.01),0)
y.shade <- dchisq(y.values,df)
polygon(x.shade,y.shade,col='blue')
text(30,.015,"p-value",col='blue')
arrows(30,.013,24.0,.007,length=.15,col='blue')

# Power and pointer to
y.shade2 <- dchisq(y.values,df,G2)
polygon(x.shade,y.shade2,col='red')
( power <- 1 - pchisq(crit.value,12,G2) )
text(71,0.03,"power=.9995",col="red")
arrows(72,.027,72,.019,length=0.15,col="red")

# Label critical value
text(40,.07,expression(paste("Pr(",chi[12]^2," >21.03) = .05")))

legend("topright",
       legend=c("Null (central chisq)","Alternatvive (non-centeral)"),
	   col=c("blue","red"),
	   lty=1
	   ) 

############ power for M2 (ordinal/linear association #############
M2 <-  36.261 
df <- 1
x <- seq(from=.01, to=50, length=10000)

null.M2 <- dchisq(x,df)
alt.M2 <- dchisq(x,df,M2)
plot(x,null.M2,type='l',col='blue',
     main='Null and Alternative for Linear Test of Association',
	 ylab=' ')
lines(x,alt.M2,col='red')
crit.value <- qchisq(.95,df)            # quantile for p=.05
lines(c(crit.value,crit.value),c(0,max(null.M2)),type='l',col='black')

# Power and pointer to
y.shade2 <- dchisq(y.values,df,M2)
polygon(x.shade,y.shade2,col='red')
( power <- 1 - pchisq(crit.value,12,G2) )
text(27,.75,"power ~ 1",col="red")
arrows(27,.65,27,.015,length=0.2,col="red")

text(7,3,"3.84")
text(27,1,expression(paste(M^2," = 36.261")))
legend("topright",
       legend=c("Null (central chisq)","Alternatvive (non-centeral)"),
	   col=c("blue","red"),
	   lty=1
	   ) 

############ Function "power.chisq" that I wrote ########################
#  Power curves as function of sample size 
#  **Input**
#    G2 = value of likelihood ratio test statistic (general association)
#    df.G2 = degrees of freedom for test general association
#    M2 = value of CMH (testing linear association)
#    alpha = alpha the tests
#    min.sample = smallest possible sample
#    max.sample = largest sampe to comput power for
#    n.obs = size of current sample
#
#  For retrospective, you should have G2, df.G2, M2, n.obs
#           
#  For prospective, to get  G2, df.G2, M2,  and n.obs
#  you have to come up with a plausible data set.  If you specify
#  this in terms of probabilities, then n.obs-1 and if you specify 
#  this in terms of frequencies, then n.obs is total frequency.
#.
#  alpha is typcially set to .05, but any value can be input
#  min.sample and max.sample are up to you.
#
# **Output** 
#   A nice graph of power for M2 and G2 versus sample size
#   power.table = table that contains sample size, power for G2 and 
#                 power for M2.
# 
# An example using the GSS is below
#                  

power.chisq <- function(G2,df.G2,M2,alpha,min.sample,max.sample,n.obs) {

df.M2 <- 1
N <- seq(from=min.sample,to=max.sample,by=1)

p.data <- gss$count/n.obs

( independence <- glm(count ~ mapaid + fechld,data=gss, family=poisson) )
p.G2 <- fitted(independence)/n.obs

powerG2 <- matrix(-99,nrow=length(N),ncol=1)
powerM2 <- matrix(-99,nrow=length(N),ncol=1)

wG2 <- G2/n.obs
wM2 <- M2/(n.obs-1)

crit.G2 <- qchisq(1-alpha,df.G2)
crit.M2 <- qchisq(1-alpha,df.M2)

for (sample.size in 5:1000) {
 i <- sample.size-min.sample + 1
 omegaG2 <- wG2*sample.size
 powerG2[i,1] <-  1 - pchisq(crit.G2,df.G2,omegaG2)
 omegaM2 <- wM2*sample.size
 powerM2[i,1] <- 1 -  pchisq(crit.M2,df.M2,omegaM2)
}

plot(N,powerG2,
     type="l", 
     col="blue", 
     ylim=c(0,1),
     main="Power for General (G2) and Linear Association (M2) ",
     xlab="Sample Size",
     ylab="Power"
     )
lines(N,powerM2,type='l',col="red")
legend("bottomright", legend=c("G2, General","M2, Linear"),col=c("blue","red"),lty=1)

power.table <- cbind(N,powerG2,powerM2)
return(power.table)
}

################################################
# Examples:                                    #
################################################
# Using GSS example
df.G2 <- 12
G2 <- 47.522
M2 <- 36.261
alpha <- .05
min.sample <- 5
max.sample <- 1000
n.obs <- sum(gss$count)

gss.eg2 <- power.chisq(G2=G2,M2=M2,alpha=.05,df.G2=12,min.sample=5,max.sample=1000,n.obs=n)
head(gss.eg2,n=50)

# Power can be greater if linear association is not present or very weak
( M2 <- 10 )
( r <- sqrt( M2 /(n.obs-1)) )

eg2 <- power.chisq(G2=G2,M2=M2,alpha=.05,df.G2=12,min.sample=5,max.sample=1000,n.obs=n)