# Edps 590BAY
# Fall 2019
# C.J.Anderson
#
# Normal with y's variance given and estimate the mean
# Data: anorexia
#  o histograms with normal overlays
#  o take a sub-set for analysis
#  o find analytic results and plot prior and posterior
#  o use hold backs and up-date results 
#  o intervale estimation
#  o prediction
#  o model checking
#  o random draws from posterior
#
library(HDInterval)


# Depending on which computer I am using:
# Office
#setwd("C:\\Users\\cja\\Documents\\Dropbox\\edps 590BAY\\Lectures\\3 Normal")

# home
setwd("C:\\Users\\cja\\Dropbox\\edps 590BAY\\Lectures\\3 Normal")

# laptop
#setwd("D:\\Dropbox\\edps 590BAY\\Lectures\\3 Normal")

anorex <- read.table("anorexia_data.txt",header=TRUE)

# create change 
anorex$change <- anorex$weight2 - anorex$weight1

# Look at data --- nice graph for lecture notes:
(ybar1 <- mean(anorex$weight1) )
(ybar2 <- mean(anorex$weight2) )
(ybar <- mean(anorex$change) )
(std1 <- sd(anorex$weight1) )
(std2 <- sd(anorex$weight2) )
(std <- sd(anorex$change) )

layout(matrix(c(1,1,0,0,1,1,3,3,2,2,3,3,2,2,0,0),4,4,byrow=FALSE))
x <- seq(from=70, to=95, by=2.5)
hist(anorex$weight1,main="Weight before Treatement",freq=FALSE)
curve(dnorm(x,ybar1,std1), add=TRUE, col="blue", lwd=2)

x <- seq(from=70, to=105, by=5) 
hist(anorex$weight2,main="Weight after Treatement",freq=FALSE)
curve(dnorm(x,ybar2,std2), add=TRUE,col='blue',lwd=2)

x <- seq(from=-10, to=25,by=2)
hist(anorex$change, main="Change in Weight", freq=FALSE)
curve(dnorm(x,ybar,std), add=TRUE,col="blue",lwd=2)

################################################
# Use about 80% of data to train-- so can show
#   impact of adding data
################################################
smp_size <- floor(0.80 * nrow(anorex))

## set the seed to make your partition reproductible
set.seed(9856)
train_ind <- sample(seq_len(nrow(anorex)), size = smp_size)

train <- anorex[train_ind, ]
test <- anorex[-train_ind, ]
dim(train)
dim(test)

#########################
# Not informative Prior #
#########################
mu0 <- 0          # Prior mean
tau02<-1000       # Prior variance of mean

#
# Use the data to get sample statistics
#
n <- nrow(train)
ybar <- mean(train$change)
sigma2 = 8**2     # assume that I know it ~var(full data set)

#
# posterior mean
#
(mu.n <- ((1/tau02)*mu0 + (n/sigma2)*ybar)/( 1/tau02  +  n/sigma2))

#
# posterior variance
#
(tau2.n <- 1/(1/tau02 + n/sigma2))


#
# plot prior and posterior
#
train <- train[order(train$change),]   # don't need this
x <- seq(from=0,to=6,length.out=100)   # need values  some values 
post.den <- dnorm(x,mu.n,sqrt(tau2.n)) # density of posterior
prio.den <- dnorm(x,mu0,sqrt(tau02))   # density of prior

par(mfrow=c(1,1))
plot(x,post.den,type='l',col="blue",lwd=2,xlab="Change in Weight",xlim=c(0,6),cex=1.5, yaxt='n',
   ylab="Density ", main="Prior N(0,1000) & Posterior N(2.6830,1.1215)" )
lines(x,prio.den,type='l',col='grey',lwd=2)
legend("topright",
  legend = c("Posterior", "Prior"),
  col = c("blue","grey"),
  text.col = c("blue","grey"), 
  lty=c(1,1),
  bty = "y",
  cex = 1.2)

##################################################
# Up-date by using "test" as our prior
##################################################
################################################
# Set the prior to posterior
mu0 <- mu.n           # New prior mean
tau02<-tau2.n         # New prior variance

# Use the data
n <- nrow(test)
ybar <- mean(test$change)
sigma2 = 8**2             # assume that I know it ~ var(full data set)

# posterior mean
(mu.n <- ((1/tau02)*mu0 + (n/sigma2)*ybar)/(1/tau02  +  n/sigma2))

# posterior variance
(tau2.n <- 1/(1/tau02 + n/sigma2))

# plot prior and posterior
x <- seq(from=0, to=6, length.out=1000)
post.den2 <- dnorm(x,mu.n,sqrt(tau2.n))
prio.den2 <- dnorm(x,mu0,sqrt(tau02))
xtmp <- seq(from=0, to=6, length.out=100)

# Plot and show progression
par(mfrow=c(1,1))
plot(x,post.den2,type='l',col="red",lwd=2,xlab="Change in Weight",yaxt='n',
   ylab="Density ", main="Up-dated Posterior" )
lines(x,prio.den2,type='l',col='blue',lwd=2)
lines(xtmp,prio.den,type='l',col='grey',lwd=2)
legend("topright",
  legend = c("N(2.7614,0.8881)", "N(2.6830,1.1215)", "N(0,1000)"),
  col = c("red","blue","grey"),
  text.col = c("red","blue","grey"), 
  lty=c(1,1),
  bty = "y",
  cex = 1.0)

###############################################
# Credible interval
###############################################
theta.95 <- c(.025,.975)                        # to set 95% intervals
(cred.int <- qnorm(theta.95,mu.n,sqrt(tau2.n)))

###############################################
# High density
###############################################
(hdi.mu <- hdi(qnorm(theta.95,mu.n,sqrt(tau2.n)),credMass=.95))

###############################################
# Posterior with interval...add to plot
###############################################
ycred <- c(0,.15)
cred.low <-rep(cred.int[1],2)
cred.hi  <-rep(cred.int[2],2)
lines(cred.low,ycred)
lines(cred.hi,ycred)

###############################################
# Model checking                              #
###############################################
# Look at data --- nice graph for lecture notes:
ybar <- mean(anorex$change)
std <- sd(anorex$change)

# Simulate data so reflects uncertainity in theta (tau2.n) and data (sigma2)
ry <- rnorm(100, mu.n,sqrt(tau2.n + sigma2))
rbar <- mean(ry)
rsd <- sd(ry)

par(mfrow=c(1,1))
hist(anorex$change,main="Data and Random Samples from Posterior Overlaid",freq=FALSE,breaks=20,xlab="Change in Weight")
curve(dnorm(x,ybar,std), add=TRUE, col="blue", lwd=2)
legend("topright",
  legend = c("Data","Random"),
  col = c("black","blue"),
  text.col = c("black","blue"), 
  lty=c(1,1),
  bty = "y",
  cex = 1.0)