# # Gender x Pre-matrial sex x Extra marital sex x Marital status # 1 = yes # 0 = no # # 1 = female # 0 = male # #setwd("C:\\Users\\cja\\Documents\\Dropbox\\edps 589\\7Model_building") setwd("D:\\Dropbox\\edps 589\\7Model_building") table <- read.table("married_gender_sex_data.txt", header=T) # Complete independence mod0 <- glm(count ~ female + pre + extra + married, data=table,family=poisson) summary(mod0) # All 2-way interaction mod2 <-glm(count ~ female + pre + extra + married + female*pre + female*extra + female*married + pre*extra + pre*married + extra*married, data=table,family=poisson) summary(mod2) # All 3-way interaction mod3 <-glm(count ~ female + pre + extra + married + female*pre + female*extra + female*married + pre*extra + pre*married + extra*married + female*pre*extra + female*pre*married + pre*extra*married + female*extra*married, data=table,family=poisson) summary(mod3) ######### input to glmnet ################# table$FP <- table$female*table$pre table$FE <- table$female*table$extra table$FM <- table$female*table$married table$PE <- table$pre*table$extra table$PM <- table$pre*table$married table$EM <- table$extra*table$married table$FPE<- table$FP*table$extra table$FPM<- table$FP*table$married table$PEM<- table$PE*table$married table$FEM<- table$FE*table$married x <- as.matrix(table[,-5]) y <- table[,5] ############### Poisson log-linear ###################### par(mfrow=c(1,1)) lasso1 <- glmnet(x,y, alpha=1, family=c("poisson")) summary(lasso1) plot(lasso1,xvar="lambda", label=T) plot(lasso1,label=T) # extract some coeffecients coef.some <-coef(lasso1)[,10] # cross validation --- finding the best and using this cv.lasso1 <- cv.glmnet(x, y, family="poisson",alpha=1) plot(cv.lasso1) best.lambda <- cv.lasso1$lambda.min # extract coefficeints for particular value(s) of lambda) coef.lasso1 <- coef(lasso1,s=best.lambda) coef.lasso1 which.is.which <- seq(1:15) label.coef <- cbind(coef.lasso1,which.is.which) label.coef # compare to mle coef.mod3 <- as.matrix(coef(mod3)) compare <- cbind(coef.mod3, coef.lasso1) compare plot(lasso1, xvar = "dev", label = TRUE) ####################### logistic regression ######################### table <- read.table("married_gender_sex_data.txt", header=T) par(mfrow=c(2,2)) divorced <- table[c(1,3,5,7, 9,11,13,15),5] divorced <- as.data.frame(divorced) married <- table[c(2,4,6,8,10,12,14,16),] mydata <- cbind(married,divorced) mydata$total <- mydata$divorced+mydata$count logit1 <- glm(cbind(count,total) ~ female + pre + extra + female*pre + female*extra + pre*extra, data=mydata, family="binomial") summary(logit1) coeff.logit1 <- coef(logit1) x <- mydata[,1:3] x$FP <- x$female*x$pre x$FE <- x$female*x$extra x$PE <- x$pre*x$extra x <- as.matrix(x) y1 <- mydata[,5] y2 <- mydata[,7] y <- cbind(y1,y2) lasso2 <- glmnet(x,y,family="binomial", alpha=1) plot(lasso2,xvar="lambda", label=T) plot(lasso2,label=T) # cross validation --- finding the best and using this cv.lasso2 <- cv.glmnet(x, y, family="binomial",alpha=1) plot(cv.lasso2) best.lambda <- cv.lasso2$lambda.min log(best.lambda) # extract coefficeints for particular value(s) of lambda) coef.lasso2 <- coef(lasso2,s=best.lambda) # compare to mle coeff.logit1 <- as.matrix(coeff.logit1) compare <- cbind(coeff.logit1, coef.lasso2) compare plot(lasso2, xvar = "dev", label = TRUE)