# # EdPsy/Stat/Psych 587 # Fall 2016 # Carolyn J. Anderson # ##################################################################################### ##################################################################################### # Function 'hlmRsq' (good one) # # # # Rsq are computed by the code below equal the proportional reduction of # # prediction errorvariance accounted for the level 1 and Level 2 Models. # # # # In population these will always be positive. If they are negative, this # # is indicates model miss-specfication. If they get smaller as add variables, # # this also could be model miss-specification # # # # There were proposed by Snijders, TAB, Bosker, RJ (1994). "Modeled variance # # in two-level models." Sociological Methods & Research, 22(3), 342-363. # # # # Input (arguments): # # "dataset" to used when fitting the model # # "model1" is the model for which you want R1sq and R2sq # # # # Requirements: # # o variable identifying group/cluster should be "id" # # o first (set of) variable(s) in formula are the random ones # # # ##################################################################################### ##################################################################################### hlmRsq <- function(dataset,model1) { X <- model.matrix(model1) # Extract design matrix for fixed effects n <- nrow(X) # total sample size varcov <- as.data.frame(VarCorr(model1))[4] # extract variance covariance of Random effects as vector q <- -.5 + sqrt(1/4 + 2*(nrow(varcov)-1)) # number of random effects (solution to quadratic eq) ncov <- q*(q-1)/2 # number of covariances justcov <- varcov[(q+1):(q+ncov),] # vector of just the covariances nclusters <- length(unique(id)) # number of clusters T <- if(q==1) { Z <- X[,1] zbar <- mean(Z) T <- varcov[1,1] } else{ Z <- X[, 1:q] zbar <- colMeans(Z) T <- diag(varcov[1:q,]) T1 <- matrix(,(q), (q)) T1[lower.tri(T1, diag=FALSE)] <- justcov T2 <- t(T1) T2[lower.tri(T2, diag=FALSE)] <- justcov T2 <- as.data.frame(T2) T2[is.na(T2)] <- 0 T <- T + T2 } T <- as.matrix(T) # Known at Psi in my glmm book # Set up for loop nj <- table(id) # number level 1 units per cluster csum <- cumsum(table(id)) # cumulative frequencies cut2 <- csum # end index cut1 <- c(0, csum) + 1 # start index cut1 <- cut1[1:(nclusters)] nj <- table(id) # number level 1 units Sw <- matrix(0, nrow=q, ncol=q) # initial Sum of Square between Sb <- matrix(0, nrow=q, ncol=q) # initial sum of squares within # loop throught to get Sb and Sw for (i in 1:nclusters){ # This is for getting model based covariance matrix Zj <- X[cut1[i]:cut2[i],1:q] # extract columns of X for random design for group j Zj <- as.matrix(Zj) onej <- matrix(1, nrow=1, ncol=nj[i]) zbarj <- (t(onej %*% Zj)/nj[i]) zbar <- matrix(zbar, nrow=q, ncol=1) Sb <- Sb + nj[i] * (zbarj - zbar) %*% t(zbarj - zbar) zbarj <- t(matrix(zbarj, nrow= q, ncol=nj[i])) Sw <- Sw + t(Zj - zbarj) %*% (Zj - zbarj) } Sb <- Sb/(n-1) Sw <- Sw/(n-1) sigma2 <- varcov[(q+ncov+1),] # put within variance estimated into sigma2 # # Fit the null model using same estimation method at model input # # Determines whether ML or REML was used to fit the model sum <- summary(model1) if (sum[1]=="Linear mixed model fit by maximum likelihood "){ method="FALSE" } else { method='TRUE' } # Fits the null model by updating formula from model input to functon f <- formula(model1) all <- all.vars(f) null <- update(f, .~ 1 -all + (1 | id)) model0 <- lmer(null, data=dataset, REML=method) varcov0 <- as.data.frame(VarCorr(model0))[4] #################################################################### # R1sq & R2sq # #################################################################### numerator1 <- t(zbar) %*% T %*% zbar + sum(diag( (T %*% (Sb + Sw)) )) + sigma2 denominator1 <- sum(varcov0) R1sq <- 1 -numerator1/denominator1 harmonic.mean <- nclusters/sum(1/nj) numerator2 <- t(zbar) %*% T %*% zbar + sum(diag((T %*% (Sb + Sw/harmonic.mean)))) + sigma2/harmonic.mean denominator2 <- sum(varcov0[1,1]) + varcov0[2,1]/harmonic.mean R2sq <- 1 - numerator2/denominator2 Rsq.values <- cbind(harmonic.mean, R1sq,R2sq) colnames(Rsq.values) <- c('harmonic.mean', 'R1sq', 'R2sq') return(Rsq.values) }