# Edps 590BAY # Fall 2019 # C.J.Anderson # # Normal with y's variance given and estimate the mean # Data: anorexia # o histograms with normal overlays # o take a sub-set for analysis # o find analytic results and plot prior and posterior # o use hold backs and up-date results # o intervale estimation # o prediction # o model checking # o random draws from posterior # library(HDInterval) # Depending on which computer I am using: # Office #setwd("C:\\Users\\cja\\Documents\\Dropbox\\edps 590BAY\\Lectures\\3 Normal") # home setwd("C:\\Users\\cja\\Dropbox\\edps 590BAY\\Lectures\\3 Normal") # laptop #setwd("D:\\Dropbox\\edps 590BAY\\Lectures\\3 Normal") anorex <- read.table("anorexia_data.txt",header=TRUE) # create change anorex$change <- anorex$weight2 - anorex$weight1 # Look at data --- nice graph for lecture notes: (ybar1 <- mean(anorex$weight1) ) (ybar2 <- mean(anorex$weight2) ) (ybar <- mean(anorex$change) ) (std1 <- sd(anorex$weight1) ) (std2 <- sd(anorex$weight2) ) (std <- sd(anorex$change) ) layout(matrix(c(1,1,0,0,1,1,3,3,2,2,3,3,2,2,0,0),4,4,byrow=FALSE)) x <- seq(from=70, to=95, by=2.5) hist(anorex$weight1,main="Weight before Treatement",freq=FALSE) curve(dnorm(x,ybar1,std1), add=TRUE, col="blue", lwd=2) x <- seq(from=70, to=105, by=5) hist(anorex$weight2,main="Weight after Treatement",freq=FALSE) curve(dnorm(x,ybar2,std2), add=TRUE,col='blue',lwd=2) x <- seq(from=-10, to=25,by=2) hist(anorex$change, main="Change in Weight", freq=FALSE) curve(dnorm(x,ybar,std), add=TRUE,col="blue",lwd=2) ################################################ # Use about 80% of data to train-- so can show # impact of adding data ################################################ smp_size <- floor(0.80 * nrow(anorex)) ## set the seed to make your partition reproductible set.seed(9856) train_ind <- sample(seq_len(nrow(anorex)), size = smp_size) train <- anorex[train_ind, ] test <- anorex[-train_ind, ] dim(train) dim(test) ######################### # Not informative Prior # ######################### mu0 <- 0 # Prior mean tau02<-1000 # Prior variance of mean # # Use the data to get sample statistics # n <- nrow(train) ybar <- mean(train$change) sigma2 = 8**2 # assume that I know it ~var(full data set) # # posterior mean # (mu.n <- ((1/tau02)*mu0 + (n/sigma2)*ybar)/( 1/tau02 + n/sigma2)) # # posterior variance # (tau2.n <- 1/(1/tau02 + n/sigma2)) # # plot prior and posterior # train <- train[order(train$change),] # don't need this x <- seq(from=0,to=6,length.out=100) # need values some values post.den <- dnorm(x,mu.n,sqrt(tau2.n)) # density of posterior prio.den <- dnorm(x,mu0,sqrt(tau02)) # density of prior par(mfrow=c(1,1)) plot(x,post.den,type='l',col="blue",lwd=2,xlab="Change in Weight",xlim=c(0,6),cex=1.5, yaxt='n', ylab="Density ", main="Prior N(0,1000) & Posterior N(2.6830,1.1215)" ) lines(x,prio.den,type='l',col='grey',lwd=2) legend("topright", legend = c("Posterior", "Prior"), col = c("blue","grey"), text.col = c("blue","grey"), lty=c(1,1), bty = "y", cex = 1.2) ################################################## # Up-date by using "test" as our prior ################################################## ################################################ # Set the prior to posterior mu0 <- mu.n # New prior mean tau02<-tau2.n # New prior variance # Use the data n <- nrow(test) ybar <- mean(test$change) sigma2 = 8**2 # assume that I know it ~ var(full data set) # posterior mean (mu.n <- ((1/tau02)*mu0 + (n/sigma2)*ybar)/(1/tau02 + n/sigma2)) # posterior variance (tau2.n <- 1/(1/tau02 + n/sigma2)) # plot prior and posterior x <- seq(from=0, to=6, length.out=1000) post.den2 <- dnorm(x,mu.n,sqrt(tau2.n)) prio.den2 <- dnorm(x,mu0,sqrt(tau02)) xtmp <- seq(from=0, to=6, length.out=100) # Plot and show progression par(mfrow=c(1,1)) plot(x,post.den2,type='l',col="red",lwd=2,xlab="Change in Weight",yaxt='n', ylab="Density ", main="Up-dated Posterior" ) lines(x,prio.den2,type='l',col='blue',lwd=2) lines(xtmp,prio.den,type='l',col='grey',lwd=2) legend("topright", legend = c("N(2.7614,0.8881)", "N(2.6830,1.1215)", "N(0,1000)"), col = c("red","blue","grey"), text.col = c("red","blue","grey"), lty=c(1,1), bty = "y", cex = 1.0) ############################################### # Credible interval ############################################### theta.95 <- c(.025,.975) # to set 95% intervals (cred.int <- qnorm(theta.95,mu.n,sqrt(tau2.n))) ############################################### # High density ############################################### (hdi.mu <- hdi(qnorm(theta.95,mu.n,sqrt(tau2.n)),credMass=.95)) ############################################### # Posterior with interval...add to plot ############################################### ycred <- c(0,.15) cred.low <-rep(cred.int[1],2) cred.hi <-rep(cred.int[2],2) lines(cred.low,ycred) lines(cred.hi,ycred) ############################################### # Model checking # ############################################### # Look at data --- nice graph for lecture notes: ybar <- mean(anorex$change) std <- sd(anorex$change) # Simulate data so reflects uncertainity in theta (tau2.n) and data (sigma2) ry <- rnorm(100, mu.n,sqrt(tau2.n + sigma2)) rbar <- mean(ry) rsd <- sd(ry) par(mfrow=c(1,1)) hist(anorex$change,main="Data and Random Samples from Posterior Overlaid",freq=FALSE,breaks=20,xlab="Change in Weight") curve(dnorm(x,ybar,std), add=TRUE, col="blue", lwd=2) legend("topright", legend = c("Data","Random"), col = c("black","blue"), text.col = c("black","blue"), lty=c(1,1), bty = "y", cex = 1.0)