# edps 589 # Fall 2018 # c.j.anderson # # Zeros & other things using same methodology: # # (1) Partial classification from panel study # (2) Structurally incomplete (teens and health concerns) # (3) Sampling zeros (Wickens hypothetical) # (4) Empathy example # (5) Anomalous cells (snow shoveling) # (6) Designing model to fit association: Holland code x gender # library(vcd) library(vcdExtra) ################################################################## # (1) Partial classification from panel study # ################################################################## vars <- expand.grid(year85=c("yes","no","not sampled"), year75=c("yes","no","not sampled")) counts <- c(175, 190, 230, 139, 1518, 982, 65, 595, -999) (panel <- cbind(vars,counts)) xtabs(counts ~ year75 + year85,data=panel) ################################################################# #(2) Structurally incomplete (teens and health concerns) # ################################################################# vars <- expand.grid(gender=c("Male","Female"), concern=c("Sex/Repdrduction","Menstrual problems","How healthy am I?","None") ) counts <- c(6, 16, 999, 12, 49, 29, 77, 102) (teens <- cbind(vars,counts)) xtabs(counts~concern+gender,data=teens) # Indicater variable for the (2,1) cell teens$I21 <- NA teens$I21[teens$counts>0] <- 0 teens$I21[teens$counts== 999] <- 1 summary(mod1 <- glm(counts~ gender + concern + I21, data=teens, family=poisson)) mode1$fitted ################################################################# #(3) Sampling zeros (Wickens hypothetical) # ################################################################# vars <- expand.grid(y=c(1,2,3,4), z=c(1,2,3), x=c(1,2)) counts <- c(5, 0, 7, 8, 9, 8, 3, 12, 6, 3, 5, 11, 10, 0, 6, 7, 8, 3, 0, 9, 0 ,2, 8, 11) (zero <- cbind(vars,counts)) # As 3-way table xtabs(counts~ x+y+z,data=zero) # As 1-way marinals xtabs(counts~x,data=zero) xtabs(counts~y,data=zero) xtabs(counts~z,data=zero) # The 2-way marginals --- getting lazy and not use "data=" xtabs(counts~x+y,zero) xtabs(counts~x+z,zero) xtabs(counts~z+y,zero) ## Homogeneous association zero$x <- as.factor(zero$x) zero$y <- as.factor(zero$y) zero$z <- as.factor(zero$z) summary( h <- glm(counts~x + y + z+ x*y + x*z + y*z,data=zero, family=poisson)) ################################################################# #(4) Empathy -- need to find the 5-way data from # ################################################################# vars <- expand.grid(concern=c("no","yes"), mention=c("no","yes"), manage=c("no","yes"), solve=c("no","yes")) counts <-c(38, 0, 3, 0, 51, 4, 16, 26, 0, 0, 0, 0, 2,1, 21, 17) (emp <- cbind(vars,counts)) xtabs(counts~ concern + mention + manage + solve,data=emp) ## Unrelated summary(unrel <- glm(counts ~ concern + mention + manage + solve +concern*mention + concern*manage + concern*solve + mention*manage + mention*solve + manage*solve, data=emp,family=poisson)) ################################################################# #((5) Anomalous cells (snow shoveling) # ################################################################# var <- expand.grid( gender=c("boy","both"), year=c("1953","1971"), religion=c("protestant","catholic","jewish","other")) counts <- c( 104, 42, 165, 142, 65, 44, 100, 130, 4,3,5,6, 13,6,32,23) (snow <- cbind(var,counts)) xtabs(counts~religion+gender+year, data=snow) ## Joint independence (RY,G) summary( ji <- glm(counts~gender + religion + year + religion*year, data=snow,family=poisson)) 1-pchisq(ji$deviance,ji$df.residual) (X2 <- sum( (snow$counts-ji$fitted)**2/ji$fitted)) 1-pchisq(X2,ji$df.residual) ## (RY,GY) summary( ci <- glm(counts~gender + gender*year + religion + year + religion*year , data=snow,family=poisson)) 1-pchisq(ci$deviance,ci$df.residual) (X2 <- sum( (snow$counts-ci$fitted)**2/ci$fitted)) 1-pchisq(X2,ci$df.residual) ## (RY,RG) summary( ci2 <- glm(counts~gender + gender*religion + religion + year + religion*year , data=snow,family=poisson)) 1-pchisq(ci2$deviance,ci2$df.residual) (X2 <- sum( (snow$counts-ci2$fitted)**2/ci2$fitted)) 1-pchisq(X2,ci2$df.residual) ## (RY,RG,YG) summary( h <- glm(counts~gender + gender*religion + gender*year + religion + year + religion*year , data=snow,family=poisson)) 1-pchisq(h$deviance,h$df.residual) (X2 <- sum( (snow$counts-h$fitted)**2/h$fitted)) 1-pchisq(X2,h$df.residual) snow$res <- residuals(ci,type="pearson") xtabs(res ~ religion + year + gender,data=snow) ## Put in indicators for catholic snow$I212 <- 0 snow$I212[6] <- 1 snow$I221 <- 0 snow$I221[7] <-1 snow$I222 <- 0 snow$I222[8] <-1 summary(ci.alt <- glm(counts~gender + gender*year +gender*religion + religion + year + religion*year +I212 + I221 + I222 , data=snow,family=poisson)) ###################################################################### # (6) Designing model to fit association: Holland code x gender # ###################################################################### vars <- expand.grid(gender=c("Male","Female"), code=c("Realistic","Investigative","Artistic","Social","Enterprising","Conventional")) counts <- c(13,1, 31,24,2,2,1,24,2,1,3,9) (holland <- cbind(vars,counts)) (hol.tab <- xtabs(counts~code+gender,data=holland)) ## independent? chisq.test(hol.tab) ## This didn't tell use why or where association lies, solve summary( hol.ind <- glm(counts~code+gender,data=holland,family=poisson)) holland$hi.res <- rstandard(hol.ind,type="pearson") xtabs(hi.res~code+gender,data=holland) ## Indicators for realistic and social holland$iReal <- 0 holland$iReal[1:2] <- 1 holland$iSocial<-0 holland$iSocial[7:8] <- 1 ## w/ some interaction summary(qi <- glm(counts~code+gender+ gender*iReal + gender*iSocial,data=holland,family=poisson)) 1-pchisq(qi$deviance,qi$df.residual) ## Indicators of association holland$I <- 0 holland$I[1:2] <- -1 holland$I[7:8] <- 1 holland$res.final <- rstandard(assoc,type="pearson") xtabs(res.final~code+gender,data=holland)