# CAL graduate admissions: 3-way table # Edps 589 # Fall 2021 # C.J.Anderson # # library(vcd) library(vcdExtra) library(MASS) library(dplyr) library(magrittr) library(DescTools) setwd("D:\\Dropbox\\edps 589\\3 Way") cal <- read.table("cal_data_graduate_admission.txt",header=TRUE) cal # For higher-way table, first is row, 2nd is column, and others layers cal.tab <- xtabs(count ~ gender + admit + depart,data=cal) ############# Basic summary statistics and table manipulation ############ # -- total number of applicants ( n.total <- sum(cal$count) ) # -- 2-way table of gender x admissions ( gender.admit <- xtabs(count ~ gender + admit,data=cal) ) prop.table(gender.admit) # cell percentages prop.table(gender.admit, 1) # row percentages prop.table(gender.admit, 2) # column percentages # -- For each deparmtent, the proportion admitted # Cross-classification first is row, 2nd is column, and others layers ( cal.tab <- xtabs(count ~ gender + admit + depart,data=cal) ) ( cal.tab2 <- xtabs(count ~ depart + admit + gender, data=cal) ) # -- Another format for 3 (higher) -way tabs structable(depart ~ admit + gender, data=cal.tab) # -- Number of applicants admitted to each department (i.e., collapses over gender) n.admit <- aggregate(count ~ admit + depart,data=cal,FUN=sum) # -- Proportion admitted to each deparmtent # Note: %>% is called multiple times to "chain" functions # together, which accomplishes the same result as nesting. # This uses dplyr. # ( by.depart <- cal %>% group_by(depart,admit) %>% summarise (count=sum(count)) %>% mutate(rel.freq = count / sum(count)) ) ( p.admit.by.depart<- by.depart[seq(2,12,2),] ) ################## Some Tests #################### # Test of independence on margin: admit x gender # Compare output from the following loglm(count ~ gender + admit, data=gender.admit) # to get G2 and X2 ( LOR <- loddsratio(gender.admit) ) # compute Log(odds ratio) summary(LOR) # significant test OR <- oddsratio(gender.admit) # compute odds ratio summary(OR) # signficant test confint(OR) # confidnece interval of odds ratio confint(OR,level=.99) # Setting another level for confint CMHtest(gender.admit, strata=NULL, types=c("general") ) # test gender x admission in each department: CMHtest(cal.tab) # Association statistics for gender x admission in each department assocstats(cal.tab) # These all point to same conclusion OR <- oddsratio(cal.tab) summary(OR) confint(OR,level=.95) # Breslow-Day test of homogeneity BreslowDayTest(cal.tab, OR = NA, correct = FALSE) # Test of homogeneity: Woolf test (I class I use Breslowday) WoolfTest(cal.tab) # A coulple of figures of 3-way table sieve(count ~ gender + admit |depart, data=cal.tab,shade=TRUE) cotabplot(cal.tab,cond="depart",panel=cotab_sieve,shade=TRUE, labeling=labeling_values, gp_text=gpar(fontface="bold") ##### Drop department A and repeat some of the above cal.sub <- subset(cal,depart!="A") cal.tab3 <- xtabs(count~gender + admit + depart,data=cal.sub) CMHtest(cal.tab3) assocstats(cal.tab3) OR <- oddsratio(cal.tab3) summary(OR) confint(OR,level=.95) # So, what can you say about data now? # Homogeneous association...(more complex than needed) # These both seem to work WoolfTest(cal.tab3) woolf_test(cal.tab3)