# Ordinal linear trend test # Edps 589 # Fall 2018 # C.J.Anderson # # -- Use gss data # -- CMH test # -- Power of CMH test over general G2 # -- Function that plots power curves and produces power table # -- Example of function using GSS data and modification of M2 # to show that M2 isn't always more powerful. # Packages that I will probably use library(MASS) library(vcd) library(vcdExtra) # Where things will live on laptop setwd("D:\\Dropbox\\edps 589\\2 Chi-square") # Use GSS data gss <- read.table("gss_data.txt",header=TRUE) head(gss) gss.tab <- xtabs(count ~ fechld + mapaid, data=gss) # Cochran-Mantel-Haenszel test of association # (vcdExtra package) # Note: Can get more than just M: # types=c("cor","rmeans","cmeans","general") CMHtest(gss.tab, strata=NULL, rscores=1:4, cscores=1:5, types=c("cor","general") ) # using the fact that M= (n-1)r^2 n <- sum(gss.tab) ( r <- sqrt( 36.26132 /(n-1)) ) ####### Power for G2 for independence (any kind of association)####### # Use output from CMHtest # Used a bit of trial to put things where I wanted them G2 <- 47.522 df <- 12 x <- seq(from=.01, to=100, length=10000) null.chisq <- dchisq(x,df) # central chi-square alt.chisq <- dchisq(x,df,G2) # non-centeral chi-square plot(x,null.chisq,type='l',col='blue', main='Null and Alternative for General Test of Association', ylab=' ') lines(x,alt.chisq,type='l',col='red') crit.value <- qchisq(.95,df) # quantile for p=.05 lines(c(crit.value,crit.value),c(0,max(null.chisq)),type='l',col='black') # p-value x.shade <- c(crit.value,crit.value,seq(from=crit.value, to=100, by=.01),100) y.values <- c(0,crit.value,seq(from=crit.value, to=100, by=.01),0) y.shade <- dchisq(y.values,df) polygon(x.shade,y.shade,col='blue') text(30,.015,"p-value",col='blue') arrows(30,.013,24.0,.007,length=.15,col='blue') # Power and pointer to y.shade2 <- dchisq(y.values,df,G2) polygon(x.shade,y.shade2,col='red') ( power <- 1 - pchisq(crit.value,12,G2) ) text(71,0.03,"power=.9995",col="red") arrows(72,.027,72,.019,length=0.15,col="red") # Label critical value text(40,.07,expression(paste("Pr(",chi[12]^2," >21.03) = .05"))) legend("topright", legend=c("Null (central chisq)","Alternatvive (non-centeral)"), col=c("blue","red"), lty=1 ) ############ power for M2 (ordinal/linear association ############# M2 <- 36.261 df <- 1 x <- seq(from=.01, to=50, length=10000) null.M2 <- dchisq(x,df) alt.M2 <- dchisq(x,df,M2) plot(x,null.M2,type='l',col='blue', main='Null and Alternative for Linear Test of Association', ylab=' ') lines(x,alt.M2,col='red') crit.value <- qchisq(.95,df) # quantile for p=.05 lines(c(crit.value,crit.value),c(0,max(null.M2)),type='l',col='black') # Power and pointer to y.shade2 <- dchisq(y.values,df,M2) polygon(x.shade,y.shade2,col='red') ( power <- 1 - pchisq(crit.value,12,G2) ) text(27,.75,"power ~ 1",col="red") arrows(27,.65,27,.015,length=0.2,col="red") text(7,3,"3.84") text(27,1,expression(paste(M^2," = 36.261"))) legend("topright", legend=c("Null (central chisq)","Alternatvive (non-centeral)"), col=c("blue","red"), lty=1 ) ############ Function "power.chisq" that I wrote ######################## # Power curves as function of sample size # **Input** # G2 = value of likelihood ratio test statistic (general association) # df.G2 = degrees of freedom for test general association # M2 = value of CMH (testing linear association) # alpha = alpha the tests # min.sample = smallest possible sample # max.sample = largest sampe to comput power for # n.obs = size of current sample # # For retrospective, you should have G2, df.G2, M2, n.obs # # For prospective, to get G2, df.G2, M2, and n.obs # you have to come up with a plausible data set. If you specify # this in terms of probabilities, then n.obs-1 and if you specify # this in terms of frequencies, then n.obs is total frequency. #. # alpha is typcially set to .05, but any value can be input # min.sample and max.sample are up to you. # # **Output** # A nice graph of power for M2 and G2 versus sample size # power.table = table that contains sample size, power for G2 and # power for M2. # # An example using the GSS is below # power.chisq <- function(G2,df.G2,M2,alpha,min.sample,max.sample,n.obs) { df.M2 <- 1 N <- seq(from=min.sample,to=max.sample,by=1) p.data <- gss$count/n.obs ( independence <- glm(count ~ mapaid + fechld,data=gss, family=poisson) ) p.G2 <- fitted(independence)/n.obs powerG2 <- matrix(-99,nrow=length(N),ncol=1) powerM2 <- matrix(-99,nrow=length(N),ncol=1) wG2 <- G2/n.obs wM2 <- M2/(n.obs-1) crit.G2 <- qchisq(1-alpha,df.G2) crit.M2 <- qchisq(1-alpha,df.M2) for (sample.size in 5:1000) { i <- sample.size-min.sample + 1 omegaG2 <- wG2*sample.size powerG2[i,1] <- 1 - pchisq(crit.G2,df.G2,omegaG2) omegaM2 <- wM2*sample.size powerM2[i,1] <- 1 - pchisq(crit.M2,df.M2,omegaM2) } plot(N,powerG2, type="l", col="blue", ylim=c(0,1), main="Power for General (G2) and Linear Association (M2) ", xlab="Sample Size", ylab="Power" ) lines(N,powerM2,type='l',col="red") legend("bottomright", legend=c("G2, General","M2, Linear"),col=c("blue","red"),lty=1) power.table <- cbind(N,powerG2,powerM2) return(power.table) } ################################################ # Examples: # ################################################ # Using GSS example df.G2 <- 12 G2 <- 47.522 M2 <- 36.261 alpha <- .05 min.sample <- 5 max.sample <- 1000 n.obs <- sum(gss$count) gss.eg2 <- power.chisq(G2=G2,M2=M2,alpha=.05,df.G2=12,min.sample=5,max.sample=1000,n.obs=n) head(gss.eg2,n=50) # Power can be greater if linear association is not present or very weak ( M2 <- 10 ) ( r <- sqrt( M2 /(n.obs-1)) ) eg2 <- power.chisq(G2=G2,M2=M2,alpha=.05,df.G2=12,min.sample=5,max.sample=1000,n.obs=n)