# Edps 590BAY --------- post this one online ----------
# Fall 2019
# C.J.Anderson
#
# Using data from 1 school from nels to illustrate simple linear regression 
#   Graphing data
#   ols 
#   Gibbs sampling with y ~ normal
#   Gibbs sampling with y ~ t          "robust"
#   Model checking
#   Outlier
#   Bayesian p-values
#   Looking at posterior distribution
#
#
library(coda)
library(plotly)   # needed for "select" command...data manipuatio
libary(rjags)
library(runjags)


setwd("C:\\Users\\cja\\Dropbox\\edps 590BAY\\Lectures\\7 Linear Regression")

#
# Read in data and look at it
#
nels <- read.table("nels_school_62821.txt",header=TRUE)
head(nels)
### Graph(s) of data
# Plot of data
plot(nels$homework,nels$math,type='p',
     main="One School from NELS",
     xlab="Time Spent Doing Homework",
     ylab="Math Scores",
     cex=1.5,
     pch=1)

#
# Simple linear regression & graph
#
# Get the means to add to plot

schMathHmwk <- aggregate(nels$math, list(nels$homework), mean)
lines(schMathHmwk$Group.1,schMathHmwk$x,type='p',pch=19,col="blue")

# Add in ols regression line
ols.lm <- lm(math~homework,data=nels)
summary(ols.lm)
coef(ols.lm)
abline(ols.lm,col="blue",lwd=2)
text(3,45,"y=59.21 + 1.09(Homework)",col="blue",cex=2)

densData <- dnorm(100,
df = as.data.frame(nels)
plot_ly(df, x = Y, y = X, z = Z, group = X, type = "scatter3d", mode = "lines")


####################### rjags ################################
library(rjags)
set.seed(75)

dataList <- list(y=nels$math,
                 x=nels$homework,
                 N=length(nels$math),
                 sdY = sd(nels$math)
                 )

nelsLR1 = "model {                                    
                 for (i in 1:N){
                  y[i] ~ dnorm(mu[i] , precision)
                  mu[i] <- b0 + b1*x[i]         
                 }
                 b0 ~ dnorm(0 , 1/(100*sdY^2) )   
                 b1 ~ dnorm(0 , 1/(100*sdY^2) )   
                 sigma ~ dunif( 1E-3, 1E+30 )
                 precision <- 1/sigma^2           
               } "

writeLines(nelsLR1, con="nelsLR1.txt")

b0Init = mean(nels$math)
b1Init = 0
sigmaInit = sd(nels$math)
initsList = list(b0=b0Init, b1=b1Init, sigma=sigmaInit )

jagsNelsLR1 <- jags.model(file="nelsLR1.txt",  # compiles and intializes model
                        data=dataList,
                        inits=initsList,
                        n.chains=4,
                        n.adapt=500)            # adaptive phase for 500 iterations

update (jagsNelsLR1, n.iter=500)               #  burn in of 500 iterations

# gets samples from all chains for 4000 iterations
Samples1 <- coda.samples(jagsNelsLR1, variable.names=c("b0","b1","sigma"), n.iter=4000)

# output summary information
summary(Samples1)
plot(Samples1)

autocorr.plot(Samples1,auto.layout=TRUE)
geweke.diag(Samples1,frac1=0.1,frac2=0.5)
effectiveSize(Samples1)
cumuplot(Samples1,probs=c(.25,.50,.75),lwd=c(1,2),lty=c(2,1),col=c("blue","red"))
gelman.diag(Samples1)
gelman.plot(Samples1)
HPDinterval(Samples1)

print(Samples1)


###################### runjags ########################
library(runjags)

# Needs inital values for each of the 4 chains:

initsList = list(list("b0"=mean(nels$math), "b1"=sd(nels$math), "sigma"=sd(nels$math)**2), 
                 list("b0"=rnorm(1,-2,4), "b1"=1  , "sigma"=2),
                 list("b0"=rnorm(1,0,1),  "b1"=8  ,"sigma"=5),
                 list("b0"=rnorm(1,2,2),  "b1"=.5 ,"sigma"=15 )   
                 )

out.runjags <- run.jags(model=nelsLR1, monitor=c("b0","b1","sigma","dic"),
                    data=dataList, n.chains=4, inits=initsList)

print(out.runjags)
par(ask=TRUE)

print(out.runjags)
par(ask=TRUE)
plot(out.runjags)
gelman.plot(out.runjags)


############################################################################
############ Robust linear regression: swap out normal and use t ###########
############################################################################

############## rjags
library(rjags)
set.seed(75)

dataList <- list(y=nels$math,
                 x=nels$homework,
                 N=length(nels$math),
                 sdY = sd(nels$math)
                 )

tMod1 = "model {for (i in 1:N){
                  y[i] ~ dt(mu[i] , precision,nu)
                  mu[i] <- b0 + b1*x[i]
                 }
                 b0 ~ dnorm(0 , 1/(100*sdY^2) )   
                 b1 ~ dnorm(0 , 1/(100*sdY^2) )   
                 sigma ~ dunif(0, 1E+10 ) 
                 precision <- 1/sigma^2
                 nuMinusOne  ~ dexp(1/29)  
                 nu <- nuMinusOne+1  
              } "

writeLines(tMod1, con="tMod1.txt")

initsList = list("b0"=mean(nels$math), "b1"=sd(nels$math), "sigma"=sd(nels$math)**2) 

jagsModelLm2 <- jags.model(file="tMod1.txt",     # compiles and intializes model
                        data=dataList,
                        inits=initsList,
                        n.chains=4,
                        n.adapt=1000)            # adaptive phase for 1000 iterations


update (jagsModelLm2, n.iter=1000)               #  burn in of 1000 iterations

# contains samples from all chains with 500 iterations
tSamples2 <- coda.samples(jagsModelLm2, variable.names=c("b0","b1","sigma","nu"), n.iter=2000)


# output summary information
summary(tSamples2)
plot(tSamples2)

autocorr.plot(tSamples2,auto.layout=TRUE)
geweke.diag(tSamples2,frac1=0.1,frac2=0.5)
effectiveSize(tSamples2)
gelman.diag(tSamples2)
cumuplot(tSamples2,probs=c(.25,.50,.75),lwd=c(1,2),lty=c(2,1),col=c("blue","red"))
gelman.plot(tSamples2)
HPDinterval(tSamples2)


#### runjags
library(runjags)

initsList = list(list("b0"=mean(nels$math), "b1"=sd(nels$math), "sigma"=sd(nels$math)**2), 
                 list("b0"=rnorm(1,-2,4), "b1"=1  , "sigma"=2),
                 list("b0"=rnorm(1,0,1),  "b1"=8  ,"sigma"=5),
                 list("b0"=rnorm(1,2,2),  "b1"=.5 ,"sigma"=15 )   
                 )

out.runjags <- run.jags(model=tMod1, monitor=c("b0","b1","sigma","nu","dic"),
                        data=dataList, n.chains=4, inits=initsList)

print(out.runjags)
plot(out.runjags)

autocorr.plot(out.runjags,auto.layout=TRUE)
geweke.diag(out.runjags,frac1=0.1,frac2=0.5)
effectiveSize(out.runjags)
gelman.diag(out.runjags)
cumuplot(out.runjags,probs=c(.25,.50,.75),lwd=c(1,2),lty=c(2,1),col=c("blue","red"))
gelman.plot(out.runjags)
HPDinterval(out.runjags)



#################### Basic checks #########################
# Compare these to parameters
mean(nels$math)                  
sd(nels$math)                   
min(nels$math)
max(nels$math)
summary(nels$math)

################# found an outlier ##################
par(mfrow=c(2,2))
hist(nels$math,main="Outlier on math")
plot(nels$homework,nels$math,main="NELS: Looks like an outlier") 
lines(1,min(nels$math),type='p',col="red", pch=20,cex=2)

## Try analysis without the outlier
nels2 <- subset(nels, math>min(nels$math))
min(nels2$math)

dataList <- list(y=nels2$math,
                 x=nels2$homework,
                 N=length(nels2$math),We 
                 sdY = sd(nels2$math)
                 )

ModelLm1x = "model {                                    
                     for (i in 1:N){
                       y[i] ~ dnorm(mu[i] , precision)
                       mu[i] <- b0 + b1*x[i]         
                  }
                     b0 ~ dnorm(0 , 1/(100*sdY^2) )   
                     b1 ~ dnorm(0 , 1/(100*sdY^2) )   
                     sigma ~ dunif( 1E-3, 1E+30 )
                     precision <- 1/sigma^2           
              } "

writeLines(ModelLm1x, con="ModelLm1x.txt")

b0Init = mean(nels2$math)
b1Init = 0
sigmaInit = sd(nels2$math)
initsList = list(b0=b0Init, b1=b1Init, sigma=sigmaInit )

jagsModelLm1x <- jags.model(file="ModelLm1x.txt",  # compiles and intializes model
                        data=dataList,
                        inits=initsList,
                        n.chains=4,
                        n.adapt=1000)            # adaptive phase for 500 iterations

update (jagsModelLm1x, n.iter=1000)               #  burn in of 1000 iterations

# gets samples from all chains for 2000 iterations
Samples1x <- coda.samples(jagsModelLm1x, variable.names=c("b0","b1","sigma"), n.iter=2000)

# output summary information
summary(Samples1x)
plot(Samples1x)


################## 200 draws from posterior distribution #################

n <- length(nels2$math)
replications <- 200
yrep <- matrix(99,nrow=n,ncol=replications)
for (s in 1:replications){
  b0 <- rnorm(1,60.131,sd=1.3714)
  b1 <- rnorm(1,0.89476,sd=0.36696)
#  b0 <- rnorm(1,60.131,sd=1)
#  b1 <- rnorm(1,0.89476,sd=1)des
 for (i in 1:n){
     yrep[i,s] = b0 + b1*nels2$homework[i] + rnorm(1,0,5.0667)
  }
 }
   
# descriptive statistics
yhats <- apply(yrep,2,"mean")
ymin  <- apply(yrep,2,"min")
ymax  <- apply(yrep,2,"max")
ysd   <- apply(yrep,2,"sd")

# Bayesian p-values
pmean <- mean(mean(nels2$math) < yhats)
pmin  <- mean(min(nels2$math) < ymin)
pmax  <- mean(max(nels2$math) < ymax)
psd   <- mean(sd(nels2$math) < ysd)


# Graphs of descriptive statistics from posterior predictive distribution 
  
par(mfrow=c(2,2))
hist(ymin,breaks=10,main=paste("Simulated N=200 Minimums", "\nBayesian P-value =", round(pmin, 2)))
lines(c(min(nels2$math),min(nels2$math)),c(0,40),col="blue",lwd=2)

hist(ymax,breaks=10,main=paste("Simulated  N=200 Maximums", "\nBayesian P-value =", round(pmax, 2)))
lines(c(max(nels2$math),max(nels2$math)),c(0,40),col="blue",lwd=2)

hist(yhats,breaks=10,main=paste("Simulated  N=200 Means", "\nBayesian P-value =", round(pmean, 2)))
lines(c(mean(nels2$math),mean(nels2$math)),c(0,40),col="blue",lwd=2)

hist(ysd,breaks=10,main=paste("Simulated  N=200 SDs", "\nBayesian P-value =", round(psd, 2)))
lines(c(sd(nels2$math),sd(nels2$math)),c(0,40),col="blue",lwd=2)


# Graphs of data and posterior predictive distribution

ypred <- apply(yrep,1,"mean")
par(mfrow=c(2,2))
hist(nels2$math,main="Data Distribution",breaks=10)
hist(ypred,main="Predicted Posterior Distribution",breaks=10)
plot(nels2$homework,nels2$math,main="Data: math x homework")
plot(nels2$homework, ypred,type='p',main='Predictions: math x homework',ylim=c(59,80))
schMathHmwk <- aggregate(nels2$math, list(nels2$homework), "mean")
lines(schMathHmwk$Group.1,schMathHmwk$x,type='p',pch=19,col="blue")

#############################################################################################################
###### Alternative way to get value from posterior predictive distribution: discrepancies ###################
## 
##
dataList <- list(y=nels2$math,
                 x=nels2$homework,
                 N=length(nels2$math),
                 sdY = sd(nels2$math)
                 )

ModelLm1x = "model {   
## Likelihood                                 
              for (i in 1:N){
                y[i] ~ dnorm(mu[i] , precision)
                mu[i] <- b0 + b1*x[i] 

                res[i] <- y[i] - mu[i]               # Data residuals
                emp.new[i] ~ dnorm(mu[i],precision)  # New Predicted y 
                res.new[i] <- emp.new[i] - mu[i]     # New Predicted residuals
        
                  }
## Prior
                b0 ~ dnorm(0 , 1/(100*sdY^2) )   
                b1 ~ dnorm(0 , 1/(100*sdY^2) )   
                sigma ~ dunif( 1E-3, 1E+30 )
                precision <- 1/sigma^2   
## Derived parameters     
                fit <- sum(res[])                    # sum data residuals this iterations
                fit.new <- sum(res.new[])            # sum predicted residuals 
                fit.mean <- mean(emp.new[])          # mean of posterior predictions
              } "

writeLines(ModelLm1x, con="ModelLm1x.txt")

b0Init = mean(nels2$math)
b1Init = 0
sigmaInit = sd(nels2$math)
initsList = list(b0=b0Init, b1=b1Init, sigma=sigmaInit )

jagsModelLm1x <- jags.model(file="ModelLm1x.txt", 
                        data=dataList,
                        inits=initsList,
                        n.chains=4,
                        n.adapt=1000)           

update (jagsModelLm1x, n.iter=1000)             

# gets samples from all chains for 2000 iterations
Samples1x <- coda.samples(jagsModelLm1x, variable.names=c("b0","b1","sigma","fit","fit.new","fit.mean","emp.new"), thin.iter=2000)

# output summary information
summary(Samples1x)
plot(Samples1x)


## Posterior predictive check of residuals
## Get the betas in a matrix
head(Samples1x)

ffit <- as.array(Samples1x)                                 # change from mcmc to array object                                  
b <- rbind(ffit[,,1],ffit[,,2],ffit[, , 3], ffit[, , 4])    # one from each chain
  
# put values into vectors or array:
b0 <- b[,1]
b1 <- b[,2]
pred <- b[,3:68]
fit <- b[,69]
fit.mean <- b[,70]
fit.new <- b[,71]
sigma <- b[,72]

mean(nels2$math)
mean(fit.mean)

# Bayesian pvalue for data vs predictions:
bpval <- mean(nels2$math < pred)
bpval