##### R code (1) to explore distributional shapes in the ##### Beta family of distributions on ( 0, 1 ), ##### (2) to look at the elicited prior distribution in the ##### AMI mortality case study (Lecture Notes, Day 1, Part 2), ##### (3) to compare prior, likelihood and posterior densities, ##### (4) to compare the Beta posterior with its Gaussian ##### approximation, and (5) to do sensitivity analysis ##### of the effect of the prior on the posterior ########## (1) various shapes in the Beta family theta.for.beta.grid <- seq( 0.01, 0.99, length = 500 ) plot( theta.for.beta.grid, dbeta( theta.for.beta.grid, 0.5, 0.5 ), type = 'l', lty = 1, xlab = 'theta', ylab = 'Density', ylim = c( 0, 6 ) ) text( 0.1, 3, '( 0.5, 0.5 )' ) lines( theta.for.beta.grid, dbeta( theta.for.beta.grid, 1, 1 ), lty = 2 ) text( 0.8, 1.5, '( 1, 1 )' ) lines( theta.for.beta.grid, dbeta( theta.for.beta.grid, 2, 3 ), lty = 3 ) text( 0.3, 2.25, '( 2, 3 )' ) lines( theta.for.beta.grid, dbeta( theta.for.beta.grid, 30, 20 ), lty = 4 ) text( 0.75, 5, '( 30, 20 )' ) ########## (2) the elicited Beta prior theta.prior.grid <- seq( 0, 0.4, length = 500 ) plot( theta.prior.grid, dbeta( theta.prior.grid, 4.5, 25.5 ), type = 'l', lty = 1, xlab = 'theta', ylab = 'Density' ) ########## (3) prior, likelihood and posterior plot prior.likelihood.posterior.grid <- seq( 0, 0.4, length = 500 ) plot( prior.likelihood.posterior.grid, dbeta( prior.likelihood.posterior.grid, 4.5, 25.5 ), type = 'l', lty = 1, xlab = 'theta', ylab = 'Density', ylim = c( 0, 22.5 ) ) text( 0.1, 8, 'prior' ) lines( prior.likelihood.posterior.grid, dbeta( prior.likelihood.posterior.grid, 73, 329 ), lty = 2 ) text( 0.225, 17.5, 'likelihood' ) lines( prior.likelihood.posterior.grid, dbeta( prior.likelihood.posterior.grid, 76.5, 353.5 ), lty = 3 ) text( 0.125, 20, 'posterior' ) ########## (4) Beta posterior and Gaussian approximation theta.approximation.grid <- seq( 0.12, 0.24, length = 500 ) plot( theta.approximation.grid, dbeta( theta.approximation.grid, 76.5, 353.5 ), type = 'l', lty = 1, xlab = 'theta', ylab = 'Density', ylim = c( 0, 22.5 ) ) text( 0.155, 20, 'posterior' ) alpha <- 4.5 beta <- 25.5 n <- 400 s <- 72 print( alpha.star <- alpha + s ) # 76.5 print( beta.star <- beta + n - s ) # 353.5 print( posterior.mean <- alpha.star / ( alpha.star + beta.star ) ) # 0.1779070 print( posterior.sd <- sqrt( ( alpha.star / ( alpha.star + beta.star ) ) * ( beta.star / ( alpha.star + beta.star ) ) / ( alpha.star + beta.star + 1 ) ) ) # 0.01842122 ##### alpha.star / ( alpha.star + beta.star ) is like p.hat ##### beta.star / ( alpha.star + beta.star ) is like ( 1 - p.hat ) ##### ( alpha.star + beta.star + 1 ) is like the sample size n ##### so this posterior sd expression is like ##### sqrt( p.hat * ( 1 - p.hat ) / n ), the frequentist ##### standard error for a proportion estimate, except that ##### p.hat is the posterior mean rather than the MLE ##### and ( alpha.star + beta.star + 1 ) is essentially ##### the posterior sample size (prior + likelihood) instead ##### of the data sample size lines( theta.approximation.grid, dnorm( theta.approximation.grid, posterior.mean, posterior.sd ), lty = 2 ) text( 0.22, 17.5, 'Gaussian approximation' ) ##### exact 95% posterior interval for theta with this prior: c( qbeta( 0.025, alpha.star, beta.star ), qbeta( 0.975, alpha.star, beta.star ) ) # 0.1432590 0.2153918 ##### Gaussian approximate 95% posterior interval c( posterior.mean - qnorm( 0.975 ) * posterior.sd, posterior.mean + qnorm( 0.975 ) * posterior.sd ) # 0.1418021 0.2140119 ########## (5) sensitivity analysis theta.sensitivity.analysis.grid <- seq( 0.12, 0.24, length = 500 ) n <- 400 s <- 72 ##### prior 1 is the elicited prior: alpha.1 <- 4.5 beta.1 <- 25.5 print( alpha.star.1 <- alpha.1 + s ) # 76.5 print( beta.star.1 <- beta.1 + n - s ) # 353.5 plot( theta.sensitivity.analysis.grid, dbeta( theta.sensitivity.analysis.grid, alpha.star.1, beta.star.1 ), type = 'l', lty = 1, xlab = 'theta', ylab = 'Density', ylim = c( 0, 22.5 ), main = 'Posterior With Varying Priors' ) text( 0.155, 20, 'prior 1' ) ##### prior 2 is a diffuse prior (uniform): alpha.2 <- 1 beta.2 <- 1 print( alpha.star.2 <- alpha.2 + s ) # 73 print( beta.star.2 <- beta.2 + n - s ) # 329 ##### note that this is the same as the likelihood distribution lines( theta.sensitivity.analysis.grid, dbeta( theta.sensitivity.analysis.grid, alpha.star.2, beta.star.2 ), lty = 2 ) text( 0.205, 19, 'prior 2' ) ##### prior 3 is even a bit more diffuse: alpha.3 <- 0.5 beta.3 <- 0.5 print( alpha.star.3 <- alpha.3 + s ) # 72.5 print( beta.star.3 <- beta.3 + n - s ) # 328.5 lines( theta.sensitivity.analysis.grid, dbeta( theta.sensitivity.analysis.grid, alpha.star.3, beta.star.3 ), lty = 3 ) text( 0.2075, 17, 'prior 3' ) ##### now let's look at the posterior mean, SD and 95% interval ##### across the 3 priors ##### prior 1 print( alpha.star.1 <- alpha.1 + s ) # 76.5 print( beta.star.1 <- beta.1 + n - s ) # 353.5 print( posterior.mean.1 <- alpha.star.1 / ( alpha.star.1 + beta.star.1 ) ) # 0.1779070 print( posterior.sd.1 <- sqrt( ( alpha.star.1 / ( alpha.star.1 + beta.star.1 ) ) * ( beta.star.1 / ( alpha.star.1 + beta.star.1 ) ) / ( alpha.star.1 + beta.star.1 + 1 ) ) ) # 0.01842122 c( qbeta( 0.025, alpha.star.1, beta.star.1 ), qbeta( 0.975, alpha.star.1, beta.star.1 ) ) # 0.1432590 0.2153918 ##### prior 2 print( alpha.star.2 <- alpha.2 + s ) # 73 print( beta.star.2 <- beta.2 + n - s ) # 329 print( posterior.mean.2 <- alpha.star.2 / ( alpha.star.2 + beta.star.2 ) ) # 0.1815920 print( posterior.sd.2 <- sqrt( ( alpha.star.2 / ( alpha.star.2 + beta.star.2 ) ) * ( beta.star.2 / ( alpha.star.2 + beta.star.2 ) ) / ( alpha.star.2 + beta.star.2 + 1 ) ) ) # 0.01920352 c( qbeta( 0.025, alpha.star.2, beta.star.2 ), qbeta( 0.975, alpha.star.2, beta.star.2 ) ) # 0.1454958 0.2206880 ##### prior 3 print( alpha.star.3 <- alpha.3 + s ) # 72.5 print( beta.star.3 <- beta.3 + n - s ) # 328.5 print( posterior.mean.3 <- alpha.star.3 / ( alpha.star.3 + beta.star.3 ) ) # 0.180798 print( posterior.sd.3 <- sqrt( ( alpha.star.3 / ( alpha.star.3 + beta.star.3 ) ) * ( beta.star.3 / ( alpha.star.3 + beta.star.3 ) ) / ( alpha.star.3 + beta.star.3 + 1 ) ) ) # 0.01919461 c( qbeta( 0.025, alpha.star.3, beta.star.3 ), qbeta( 0.975, alpha.star.3, beta.star.3 ) ) # 0.1447270 0.2198837 ##### ML analysis print( theta.hat <- s / n ) # 0.18 print( theta.hat.se <- sqrt( theta.hat * ( 1 - theta.hat ) / n ) ) # 0.01920937 c( theta.hat - qnorm( 0.975 ) * theta.hat.se, theta.hat + qnorm( 0.975 ) * theta.hat.se ) # 0.1423503 0.2176497 ##### ML analysis in this problem corresponds to Bayesian analysis ##### with an improper Beta( 0, 0 ) prior ##### comparison of results ##### 95% interval ##### method estimate SD/SE lower upper ##### Bayes, prior 1 0.1779 0.01842 0.1433 0.2154 ##### Bayes, prior 2 0.1816 0.01920 0.1455 0.2207 ##### Bayes, prior 3 0.1808 0.01919 0.1447 0.2199 ##### ML 0.1800 0.01921 0.1424 0.2176 ##### all methods essentially agree to two significant figures: ##### theta is about 0.18, give or take about 0.19, and a 95% ##### interval for theta runs from 0.14 to 0.22 ##### this agreement occurs for two reasons: (1) all the priors ##### are relatively diffuse and (2) n is large relative to ##### the prior sample size (this is the Bernstein-von Mises ##### theorem in action) ##### later we'll see examples in which the Bayesian approach ##### produces better inferences than ML even with diffuse priors, ##### where better means interval estimates whose actual frequentist ##### coverage is (far) closer to nominal