# EdPsy/Stat/Psych 587 # Fall 2017 # Carolyn J. Anderson # cja@illinois.edu # # Last changed 9/1/2017 # # Function robust computed robust/sandwiche/empirical standard errors for # in 2 level linear mixed model (HLM) fixed effects # # version 7 # Sept 1, 2017 # # C.J. Anderson # cja@illinois.edu # ############################################################################### ############################################################################### # Defines function robust # # # # **********************IMPORTANT************************ # # For this to run correctly requires that # # o the random effects in the formula must be the first ones after "~" # # (at least until I get better are R) # # ******************************************************* # # # Input: # # o model0 is the results from lmer # # o the response variable # # o the id variable that defines clusters # # o dftype="residual" or "between/within" denominater degrees of freedom # # # # Output: A table with # # o estimtaed fixed effects # # o degrees of freedom (residual or between/within) # # o model based # - s.e. # # - t-statistic # # - p-value from Student's t-distribution # # o robust (empirical, sandwiche estimators) s.e. # # - s.e. # # - t-statistic # # - p-value from Student's t-distribution # # # # Notes: # # o Example usage included is below (at end of file) # # o This function computes between/within df differently than SAS when # # discrete variables are treated as.factor; however, when use dummy # # or other coding the df are the same # # o I need to work on between/within for more complex models---close but # # not quite right # # # # Future additions (requested): # # o 3 or more levels # # o cross-random effects # ############################################################################### ############################################################################### robust <- function(inmodel,response,idvar,dftype) { model0 <- inmodel y <- response id <- idvar ########################## # Set up # ########################## varcov <- as.data.frame(VarCorr(model0))[4] # extract variance covariance as vector q <- -.5 + sqrt(1/4 + 2*(nrow(varcov)-1)) # number of random effects nclusters <-length(unique(id)) # number of clusters X <- model.matrix(model0) # Extract design matrix for fixed effects n <- nrow(X) # total sample size p <- ncol(X) # number of fixed effects ncov <- q*(q-1)/2 # number of covariances ############################################################################ # This is general and works but perhaps not as efficient as could be # ############################################################################ if(q==1) { T <- varcov[1,1] Z <- X[,1] } else { Z <- X[, 1:q] T <- diag(varcov[1:q,]) ncov <- q*(q-1)/2 justcov <- varcov[(q+1):(q+ncov),] T1 <- matrix(,(q), (q)) T1[lower.tri(T1, diag=FALSE)] <- justcov T2 <- t(T1) T2[lower.tri(T2, diag=FALSE)] <- justcov T2 <- as.data.frame(T2) T2[is.na(T2)] <- 0 T <- T + T2 } T <- as.matrix(T) nj <- table(id) # number level 1 units per cluster csum <- cumsum(table(id)) # cumulative frequencies cut2 <- csum # end index cut1 <- c(0, csum) + 1 # start index cut1 <- cut1[1:(nclusters)] sigma2 <- varcov[(q+ncov+1),] # put within variance estimated into sigma2 gamma <- as.matrix(fixef(model0), nrow=q, ncol=1) # extract fixed effects and put into column vector yhat <- X %*% gamma # model based value of response variable) yvec <- as.matrix(y, n) # turn y into a matrix (vector actually) model.cov <- vcov(model0) # These are model based ses from lmer Robust.cov <- matrix(0,nrow=p, ncol=p) # loop throught to get robust.cov for (i in 1:nclusters){ # This is for getting model based covariance matrix Zj <- X[cut1[i]:cut2[i],1:q] # extract columns of X for group j *********** Zj <- as.matrix(Zj) I <-diag(nrow(Zj)) # create identity matirx of appropirate size Vj <- Zj %*% T %*% t(Zj) + sigma2*I # compute V_j Xj <- X[cut1[i]:cut2[i],] iVj <- solve(Vj) A <- model.cov %*% t(Xj) %*% iVj # This is for getting data based covaraiance matrix yj <- yvec[cut1[i]:cut2[i],1] # extract columns of y for group j yhatj <- yhat[cut1[i]:cut2[i],1] # extract columns of yhat for group j ssresj <- (yj-yhatj) %*% t(yj - yhatj) # compute sum sq res for group j Robust.cov <- Robust.cov + A %*% ssresj %*% t(A) } ################################################################# # Compute test statistics # ################################################################# model.se <-sqrt(diag(model.cov)) model.se model.t <- gamma/model.se robust.se <- sqrt(diag(Robust.cov)) robust.se robust.t <- gamma/robust.se ################################################################ # Compute chosen type of (denominator) df # # if (var(X[cut1[1]:cut2[1],i]) < .000000001 (i.e., zero) # ################################################################ rank.design <- rankMatrix(X) df.residual = n -rank.design[1] if (dftype=="residual"){ df <- df.residual } # for residual df if (dftype=="between/within"){ # for between/within df pbetween <- 0 # find number of between variables for (i in 1:p){ if (var(X[cut1[i]:cut2[i],i]) < .0000001) # if variance w/in=0, then it's a pbetween <- pbetween + 1 # between cluster variable } df <- matrix(, nrow = p, ncol = 1) # initalize matrix for (i in 1:p){ if (var(X[cut1[1]:cut2[1],i]) > 0.0000){ # checking to see if variance >0 tmp <- df.residual - nclusters + pbetween # then this is a within cluster fixed effect } else { tmp <- nclusters - pbetween # else this is a between cluster fixed effect } df[i] <- tmp } } ################################################ # Compute p-values # ################################################ p.valueR <- 2*(1-pt(abs(robust.t),df)) p.valueM <- 2*(1-pt(abs(model.t),df)) ################################################ # Output table of results # ################################################ fixed.table <- cbind(gamma, df, model.se, model.t, p.valueM, robust.se, robust.t, p.valueR) colnames(fixed.table) <- c('Fixed Est.', dftype, 'Model se.','Model t', 'p-value', 'Robust se', 'Robust t', 'p-value') return(fixed.table) } ########################################################################## ################ End function robust ##################################### ########################################################################## #################################################################################################### # example using the hsb data # #################################################################################################### ### First the neccessary stuff # Install and load lme4 and lattice library(lme4) library(lattice) ############################################################################ # Data set up: merge hsb1 and hsb2 # ############################################################################ # # Read in student level data hsb1 <- read.table(file="C:/Users/cja/Dropbox/edps587/R work/HSB1data.txt", header=TRUE) # Top & bottom of the file # head(hsb1) tail(hsb1) # # Read in school level data # hsb2 <- read.table(file="C:/Users/cja/Dropbox/edps587/R work/HSB2data.txt", header=TRUE) # Top & bottom of\ the file # head(hsb2) tail(hsb2) # # Merging student & school level files by school id ('id') # # # hsb <- merge(hsb1,hsb2, by=c('id')) head(hsb) hsb$cses <- hsb$ses-hsb$meanses # # Attach to make variables easier to name # attach(hsb) # Fit model and get robust se with tests using chosen type of degrees of freedom model.hsb1 <- lmer(mathach ~ 1 + cses + female + meanses + sector + minority + cses*female +( 1 + cses | id ), data=hsb, REML=FALSE) summary(model.hsb1) robust(model.hsb1,mathach,id,"residual") model.hsb2 <- lmer(mathach ~ 1 + cses + female + meanses + sector + minority + cses*female +( 1 + cses | id , data=hsb, REML=FALSE) summary(model.hsb2) robust(model.hsb2,mathach,id,"between/within") model.hsb3 <- lmer(mathach ~ 1 + cses + female + minority + meanses +( 1 +c ses+female | id), data=hsb, REML=FALSE) summary(model.hsb3) robust(model.hsb3,mathach,id,"between/within") sex <- as.factor(female) model.hsb4 <- lmer(mathach ~ 1 + cses + sex + cses*sex + meanses + sector +(1+ sex + cses | id), data=hsb, REML=FALSE) summary(model.hsb4) robust(model.hsb4,mathach,id,"between/within") model.hsb5 <- lmer(mathach ~ 1 + female + cses + meanses + sector + sector*female + cses*female + meanses*sector +(1+ female | id), data=hsb, REML=FALSE) summary(model.hsb5) robust(model.hsb5,mathach,id,"between/within")