#
#  EdPsy/Stat/Psych 587
#  Fall 2016
#  Carolyn J. Anderson  
#
#####################################################################################
#####################################################################################
#    Function 'hlmRsq'           (good one)                                         #
#                                                                                   #
#    Rsq are computed by the code below equal the proportional reduction of         #
#      prediction errorvariance accounted for the level 1 and Level 2 Models.       #
#                                                                                   #
#    In population these will always be positive.  If they are negative, this       #
#      is indicates model miss-specfication.  If they get smaller as add variables, #
#      this also could be model miss-specification                                  #
#                                                                                   #
#    There were proposed by Snijders, TAB, Bosker, RJ (1994). "Modeled  variance    #
#      in  two-level  models." Sociological Methods & Research, 22(3), 342-363.     #
#                                                                                   #
#  Input (arguments):                                                               #
#    "dataset" to used when fitting the model                                       #
#    "model1"  is the model for which you want R1sq and R2sq                        #
#                                                                                   #
#  Requirements:                                                                    #
#    o variable identifying group/cluster should be "id"                            #
#    o first (set of) variable(s) in formula are the random ones                    #
#                                                                                   #
#####################################################################################
#####################################################################################

hlmRsq <- function(dataset,model1) {

X <- model.matrix(model1)                          # Extract design matrix for fixed effects
n <- nrow(X)                                       # total sample size
varcov <- as.data.frame(VarCorr(model1))[4]        # extract variance covariance of Random effects as vector
q <- -.5 + sqrt(1/4 + 2*(nrow(varcov)-1))          # number of random effects (solution to quadratic eq)
ncov <- q*(q-1)/2                                  # number of covariances 
justcov <- varcov[(q+1):(q+ncov),]                 # vector of just the covariances
nclusters <- length(unique(id))                    # number of clusters

T <- if(q==1) { 
   Z <- X[,1]
   zbar <- mean(Z)
   T <- varcov[1,1]
 } else{ 
   Z <- X[, 1:q]
   zbar <- colMeans(Z)
   T <- diag(varcov[1:q,])                       
   T1 <- matrix(,(q), (q))
   T1[lower.tri(T1, diag=FALSE)] <- justcov
   T2 <- t(T1)
   T2[lower.tri(T2, diag=FALSE)] <- justcov
   T2 <- as.data.frame(T2)
   T2[is.na(T2)] <- 0
   T <- T + T2
}
T <- as.matrix(T)                            # Known at Psi in my glmm book

# Set up for loop

nj <- table(id)                              # number level 1 units per cluster
csum <- cumsum(table(id))                    # cumulative frequencies
cut2 <- csum                                 # end index
cut1 <- c(0, csum) + 1                       # start index
cut1 <- cut1[1:(nclusters)]

nj <- table(id)                              # number level 1 units
Sw <- matrix(0, nrow=q, ncol=q)              # initial Sum of Square between
Sb <- matrix(0, nrow=q, ncol=q)              # initial sum of squares within

# loop throught to get Sb and Sw

for (i in 1:nclusters){         
  # This is for getting model based covariance matrix              
    Zj <- X[cut1[i]:cut2[i],1:q]        # extract columns of X for random design for group j
    Zj <- as.matrix(Zj)
    onej <- matrix(1, nrow=1, ncol=nj[i])
    zbarj <- (t(onej %*% Zj)/nj[i])
    zbar  <- matrix(zbar, nrow=q, ncol=1)
    Sb <- Sb + nj[i] * (zbarj - zbar) %*% t(zbarj - zbar) 

    zbarj <- t(matrix(zbarj, nrow= q, ncol=nj[i]))
    Sw <- Sw +  t(Zj - zbarj) %*% (Zj - zbarj)
}
Sb <- Sb/(n-1)
Sw <- Sw/(n-1)

sigma2 <- varcov[(q+ncov+1),]                # put within variance estimated into sigma2

#
# Fit the null model using same estimation method at model input
#

# Determines whether ML or REML was used to fit the model

sum <- summary(model1)
if (sum[1]=="Linear mixed model fit by maximum likelihood "){
   method="FALSE"
} else {
   method='TRUE'
}

# Fits the null model by updating formula from model input to functon

f <- formula(model1)
all <- all.vars(f)
null <- update(f, .~ 1 -all + (1 | id))
model0 <- lmer(null, data=dataset, REML=method)
varcov0 <- as.data.frame(VarCorr(model0))[4] 

####################################################################
#  R1sq    & R2sq                                                  #
####################################################################

numerator1 <- t(zbar) %*% T %*% zbar + sum(diag( (T %*% (Sb + Sw)) )) + sigma2
denominator1 <- sum(varcov0)
R1sq <- 1 -numerator1/denominator1

harmonic.mean <- nclusters/sum(1/nj)

numerator2 <- t(zbar) %*% T %*% zbar + sum(diag((T %*% (Sb + Sw/harmonic.mean)))) + sigma2/harmonic.mean
denominator2 <- sum(varcov0[1,1]) + varcov0[2,1]/harmonic.mean

R2sq <- 1 - numerator2/denominator2

Rsq.values <- cbind(harmonic.mean, R1sq,R2sq)
colnames(Rsq.values) <- c('harmonic.mean', 'R1sq', 'R2sq')
return(Rsq.values)
}