##### R code to use IID sampling to simulate the posterior mean
##### in the AMI mortality example (Lecture Notes, Day 1, Part 3a)

beta.sim <- function( m, alpha, beta, n.sim, seed ) {

  set.seed( seed )

  theta.out <- matrix( 0, n.sim, 2 )

  for ( i in 1:n.sim ) {

    theta.sample <- rbeta( m, alpha, beta )

    theta.out[ i, 1 ] <- mean( theta.sample )

    theta.out[ i, 2 ] <- sqrt( var( theta.sample ) / m )

  }

  return( theta.out ) 

}

##### This function simulates, n.sim times, the process of taking an
##### IID sample of size m from the Beta( alpha, beta ) distribution 
##### and calculating theta.bar and se.theta.bar

m <- 100

alpha <- 76.5

beta <- 353.5

n.sim <- 500

seed <- 9626954

theta.out <- beta.sim( m, alpha, beta, n.sim, seed )

theta.out[ 1:5, ]

# 0.1805196 0.001963036
# 0.1773793 0.001971002
# 0.1754717 0.001816860
# 0.1791012 0.001943177
# 0.1794574 0.001871565

print( truth <- alpha / ( alpha + beta ) )

# 0.1779070

##### The theta.bar values fluctuate around the truth (0.178) with a
##### give-or-take of about 0.0018, which agrees well with the
##### theoretical SE sigma / sqrt( m ) = 0.0184 / sqrt( 100 ) =. 0.00184
##### (the SD value 0.0184 comes from page 47 in Part 2)

theta.bar <- theta.out[ , 1 ]

qqnorm( ( theta.bar - mean( theta.bar ) ) / sd( theta.bar ),
  xlab = "Quantiles of Standard Normal", main = "", pch = 20 )

abline( 0, 1 )

##### Each of the theta.bar values is the mean of m = 100 IID draws,
##### so (by the Central Limit Theorem) the (frequentist) distribution 
##### of the random variable theta.bar should be closely approximated 
##### by a Gaussian, and you can see from the qqplot that this is true

theta.bar.SE <- theta.out[ , 2 ]

sum( ( theta.bar - qnorm( 0.975 ) * theta.bar.SE < truth ) *
  ( truth < theta.bar + qnorm( 0.975 ) * theta.bar.SE ) ) / n.sim

# 0.954

##### With this set of pseudo-random numbers, 95.4% of the
##### nominal 95\% Monte Carlo confidence intervals for the posterior
##### mean included the truth

##### a single sample of m = 100 IID draws would probably not have
##### a Monte Carlo SE (MCSE) that's small enough for careful work,
##### but how much bigger should m be?

print( theta.bar <- beta.sim( m, alpha, beta, 1, seed ) )

# 0.1805196 0.001963036

##### if you want the MCSE to be epsilon.1 = 0.00005 (say), so that
##### the posterior mean is pinned down pretty well to 4 significant
##### figures, then (as noted in Day 1, Part 3a) you need
##### m = sigma^2 / epsilon^2 IID draws from the posterior, where
##### sigma is the posterior sd for theta; from the MCSE equation
##### 0.001963036 = sigma.hat / sqrt( m ) = sigma.hat / sqrt( 100 ),
##### sigma can be estimated as about 0.001963036 * sqrt( 100 ) 
##### from the initial m = 100 draws above, and this is about 0.01963

epsilon.1 <- 0.00005

sigma.hat <- 0.001963036 * sqrt( 100 )

print( m.target.1 <- sigma.hat^2 / epsilon.1^2 )

# 154140.4

##### so, according to this initial sample of size m = 100, about
##### 154,200 IID draws would be needed to get the MCSE down to
##### about 0.00005; this sounds like a lot of draws, but computers
##### are fast these days:

print( theta.bar <- beta.sim( m.target.1, alpha, beta, 1, seed ) )

# 0.1778792 4.689692e-05

# the answer, 0.1779 plus or minus about 0.000047, is almost
# instantaneous, and compares well with the truth (0.1779070)

##### if you want the estimated posterior mean to differ from the
##### truth by no more than epsilon.1 = 0.00005 with at least
##### 100 * ( 1 - epsilon.2 ) = 100 * ( 1 - 0.05 ) = 95% Monte Carlo
##### (frequentist) probability, then (as noted in Day 1, Part 3a)
##### you need to multiply m.target.1 by 
##### qnorm( 1 - epsilon.2 / 2 )^2, which is about 1.96^2 or about 4:

epsilon.2 <- 0.05

print( m.target.2 <- m.target.1 * qnorm( 1 - epsilon.2 / 2 )^2 )

# 592124

##### this again sounds like a lot of draws, but the results are
##### again pretty close to instantaneous:

print( theta.bar <- beta.sim( m.target.2, alpha, beta, 1, seed ) )

# 0.1778897 2.392525e-05