# how to mcmc sample with rjags in R # note: the comment character in R is # # (1) create a new directory; for illustration here, i'm going # to call it # nb10-jags # (2) in a text file, write your model in the same way you would # for winbugs, except that the first line has to say # model { # instead of # { # and store this text file in the directory nb10-jags ; for # illustration here i'll call that file # nb10-model-1.txt # for clarity, this file looks like the following here # (without the # comment symbols, of course): # model { # mu ~ dnorm( 0.0, 1.0E-6 ) # tau ~ dgamma( 0.001, 0.001 ) # nu ~ dunif( 2.0, 12.0 ) # for ( i in 1:n ) { # # y[ i ] ~ dt( mu, tau, nu ) # } # # sigma <- 1.0 / sqrt( tau ) # y.new ~ dt( mu, tau, nu ) # } # (3) open up an R session and change directory to nb10-jags # (4) if this is your first time using rjags, you have to install # the rjags package and then load it; if you've already # installed it you just need to load it now (you have to be # connected to the internet for step (4a) to work). # (4a) to install rjags, select # Install package(s) # from the packages pull-down menu in R; a window called # "CRAN mirror" will pop up asking you to choose a mirror site # from a list (i generally use USA (CA 1) or USA (CA 2)); # choose your mirror and click OK in the CRAN mirror window; # a new window called "Packages" will pop up; scroll down, click # on rjags and click OK in the Packages window; R will mumble # a few things and finish with a remark like # The downloaded binary packages are in blah-blah # (4b) to load rjags, select # Load package... # from the packages pull-down menu in R; a window called # "Select one" will pop up; click on rjags and click OK in # the Select one window; R will mumble a few things, possibly # finishing with some warning messages, which you can ignore, # saying that various things were built under a different # version of R than the one you're using; when you get a > # prompt at the end of the mumbling, rjags will be loaded # (5) your data file is the same as in winbugs except that you # assign it to an object in your current R session; for # illustration here it's assigned to nb10.data.1 with the # following command: nb10.data.1 <- list( y = c( 409., 400., 406., 399., 402., 406., 401., 403., 401., 403., 398., 403., 407., 402., 401., 399., 400., 401., 405., 402., 408., 399., 399., 402., 399., 397., 407., 401., 399., 401., 403., 400., 410., 401., 407., 423., 406., 406., 402., 405., 405., 409., 399., 402., 407., 406., 413., 409., 404., 402., 404., 406., 407., 405., 411., 410., 410., 410., 401., 402., 404., 405., 392., 407., 406., 404., 403., 408., 404., 407., 412., 406., 409., 400., 408., 404., 401., 404., 408., 406., 408., 406., 401., 412., 393., 437., 418., 415., 404., 401., 401., 407., 412., 375., 409., 406., 398., 406., 403., 404.), n = 100 ) # (6) your initial values file is the same as in winbugs except # that you assign it to an object in your current R session; for # illustration here it's assigned to nb10.inits.1 with the # following command: nb10.inits.1 <- list( mu = 404.59, tau = 0.05, nu = 5.0, y.new = 400.0 ) # (7) now you do the model checking and compiling with the following # command (using the names i've chosen for illustration above): nb10.run.1 <- jags.model( file = "nb10-model-1.txt", data = nb10.data.1, inits = nb10.inits.1 ) # (8) before doing the random MCMC sampling, it's good form to set the # random number seed, so that other people running your code get # the same results you do: set.seed( 65748392 ) # (9) now you do a burn-in of (e.g.) 1000 iterations from your # initial values with the following commands: n.burnin <- 1000 update( nb10.run.1, n.iter = n.burnin ) # (10) now you do a monitoring run of (e.g.) 100000 iterations with # the following command (at decent GHz this will take about # 90 seconds): n.monitor <- 100000 nb10.run.1.results <- jags.samples( nb10.run.1, variable.names = c( "mu", "sigma", "nu", "y.new" ), n.iter = n.monitor ) # (11) now you get to summarize the posterior in a variety of ways; # here's some useful code to do so; first you extract each of # the monitored columns of the MCMC data set into individual # variables: mu.star <- nb10.run.1.results$mu sigma.star <- nb10.run.1.results$sigma nu.star <- nb10.run.1.results$nu y.new.star <- nb10.run.1.results$y.new # then you can create graphical and numerical summaries of each # monitored quantity, as follows: ##### summary for mu # (11a) first you create what i call the MCMC 4-plot: par( mfrow = c( 2, 2 ) ) plot( 1:n.monitor, mu.star, type = 'l', xlab = 'Iteration Number', ylab = 'mu' ) # the upper left graph is a time-series plot of the monitored # iterations (to visually check for stationarity) plot( density( mu.star ), xlab = 'mu', main = '' ) # the upper right graph is a density trace of the marginal posterior # distribution for the quantity being monitored acf( as.ts( mu.star[ 1:n.monitor ], start = 1, end = n.monitor, frequency = 1 ), lag.max = 10, main = '' ) pacf( as.ts( mu.star[ 1:n.monitor ], start = 1, end = n.monitor, frequency = 1 ), lag.max = 10, main = '' ) # the lower left and right graphs are plots of the autocorrelation # function (ACF) and the partial autocorrelation function (PACF) # for the time series of monitored iterations; if these iterations # behave like an autoregressive time series of order 1 with first- # order autocorrelation rho (often denoted by AR1( rho )), then # (a) the PACF will have a single spike of height rho at lag 1 and # no other significant spikes and (b) the ACF will exhibit a # geometric decay pattern with a spike of height rho at lag 1, a # spike of approximate height rho^2 at lag 2, and so on -- for # the parameter mu the ACF and PACF plots look perfectly like an # AR1 with a first-order autocorrelation of about rho.1.hat = 0.21 # an MCMC time series with a rho.1.hat value close to 0 (which # would be equivalent to IID sampling) is said to be "mixing well"; # here we would say that this is true of the mu series # (11b) then you can make numerical summaries of the marginal # posterior for the monitored quantity: c( mean( mu.star ), sd( mu.star ), quantile( mu.star, probs = c( 0.025, 0.975 ) ) ) # 2.5% 97.5% # 404.29713 0.46744 403.38488 405.22240 print( rho.1.hat.mu.star <- cor( mu.star[ 1:( n.monitor - 1 ) ], mu.star[ 2:n.monitor ] ) ) # 0.2125716 # the reason for making the ACF and PACF plots is that if the time # series of monitored iterations for a given quantity behaves like # an AR1( rho ), then the Monte Carlo standard error (MCSE) of the # mean of this series is given by the following expression: print( se.mu.star.mean <- ( sd( mu.star ) / sqrt( n.monitor ) ) * sqrt( ( 1 + rho.1.hat.mu.star ) / ( 1 - rho.1.hat.mu.star ) ) ) # 0.001834315 # this is also the right order of magnitude for the MCSE of the # MCMC estimate of the posterior SD and the quantiles giving the # 95% interval for the monitored quantity # we would conclude, based on this model, that the true weight of # NB10 was about 404.30 micrograms below the nominal weight of 10 # grams, give or take about 0.47 micrograms, and that a 95% # posterior interval for the true weight runs from about 403.38 # to about 405.22 micrograms below 10 grams ##### summary for sigma plot( 1:n.monitor, sigma.star, type = 'l', xlab = 'Iteration Number', ylab = 'sigma' ) plot( density( sigma.star ), xlab = 'sigma', main = '' ) acf( as.ts( sigma.star[ 1:n.monitor ], start = 1, end = n.monitor, frequency = 1 ), lag.max = 10, main = '' ) pacf( as.ts( sigma.star[ 1:n.monitor ], start = 1, end = n.monitor, frequency = 1 ), lag.max = 10, main = '' ) # the marginal posterior for sigma has a bit of a long right-hand # tail, as you would expect for a scale parameter; the ACF and PACF # plots show that the sigma time series behaves more or less like an # AR1 with a first-order autocorrelation of about 0.45 (so the # MCMC output for this parameter is not mixing quite as well as # the output for mu, but 0.45 is still a fairly low autocorrelation) c( mean( sigma.star ), sd( sigma.star ), quantile( sigma.star, probs = c( 0.025, 0.975 ) ) ) # 2.5% 97.5% # 3.8625312 0.4383744 3.0775740 4.7879916 print( rho.1.hat.sigma.star <- cor( sigma.star[ 1:( n.monitor - 1 ) ], sigma.star[ 2:n.monitor ] ) ) # 0.4480396 print( se.sigma.star.mean <- ( sd( sigma.star ) / sqrt( n.monitor ) ) * sqrt( ( 1 + rho.1.hat.sigma.star ) / ( 1 - rho.1.hat.sigma.star ) ) ) # 0.002245337 # we would conclude that the population value of the scale parameter # in the t model is about 3.86 micrograms, give or take about # 0.44 micrograms, and that a 95% posterior interval for this # parameter runs from about 3.08 to about 4.79 micrograms ##### summary for nu plot( 1:n.monitor, nu.star, type = 'l', xlab = 'Iteration Number', ylab = 'nu' ) plot( density( nu.star ), xlab = 'nu', main = '' ) acf( as.ts( nu.star[ 1:n.monitor ], start = 1, end = n.monitor, frequency = 1 ), lag.max = 10, main = '' ) pacf( as.ts( nu.star[ 1:n.monitor ], start = 1, end = n.monitor, frequency = 1 ), lag.max = 10, main = '' ) # the marginal posterior for nu has a pronounced long right-hand # tail; the ACF and PACF plots show that the nu time series behaves # more or less like an AR1 with a first-order autocorrelation of # almost 0.6 (this parameter is mixing the least well, but 0.6 is # still not too bad for a first-order autocorrelation) c( mean( nu.star ), sd( nu.star ), quantile( nu.star, probs = c( 0.025, 0.975 ) ) ) # 2.5% 97.5% # 3.636421 1.170341 2.140174 6.567723 print( rho.1.hat.nu.star <- cor( nu.star[ 1:( n.monitor - 1 ) ], nu.star[ 2:n.monitor ] ) ) # 0.5729619 print( se.nu.star.mean <- ( sd( nu.star ) / sqrt( n.monitor ) ) * sqrt( ( 1 + rho.1.hat.nu.star ) / ( 1 - rho.1.hat.nu.star ) ) ) # 0.007102942 # notice the effect of the roughly 0.6 autocorrelation value: this # MCSE, while still small, is about 3.5 times larger than the MCSEs # for mu and sigma # we would conclude that the population value of the degrees # of freedom parameter in the t model is about 3.6, give or take # about 1.2, and that a 95% posterior interval for this # parameter runs from about 2.1 to about 6.6 ##### summary for y.new plot( 1:n.monitor, y.new.star, type = 'l', xlab = 'Iteration Number', ylab = 'y.new' ) plot( density( y.new.star ), xlab = 'y.new', main = '' ) acf( as.ts( y.new.star[ 1:n.monitor ], start = 1, end = n.monitor, frequency = 1 ), lag.max = 10, main = '' ) pacf( as.ts( y.new.star[ 1:n.monitor ], start = 1, end = n.monitor, frequency = 1 ), lag.max = 10, main = '' ) # the time series plot for y.new shows that the t model with small # degrees of freedom can generate some simulated predicted data # values that are quite far from the center of the distribution # (in fact, farther away from the middle than the actual data values) quantile( nb10.data.1$y, probs = c( 0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99 ) ) # 1% 5% 10% 25% 50% 75% 90% 95% 99% # 391.83 398.00 399.00 401.00 404.00 407.00 410.00 412.05 423.14 quantile( y.new.star, probs = c( 0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99 ) ) # 1% 5% 10% 25% 50% 75% 90% 95% 99% # 387.86 395.53 398.12 401.36 404.29 407.21 410.48 413.09 420.59 # you can see that the tails of the predictive distribution are # a bit heavier than those of the actual data values # this is a good way to informally check a Bayesian model: # compare the predictive distribution it produces for future data # with the actual data values par( mfrow = c( 1, 1 ) ) qqplot( nb10.data.1$y, y.new.star, xlab = 'NB10 data', ylab = 'Predictive distribution from t model' ) # the fit of this plot is terrific except for a small number of # extreme values in the predictive distribution that are far away # from the actual extremes of the data; what to make of this fact # is a bit unclear: on the one hand, it calls the t model slightly # into question, but on the other hand it suggests that if the # NBS people were to make more than 100 weighings of NB10 they # might get some readings that were even more extreme than the # outliers they actually observed c( mean( y.new.star ), sd( y.new.star ), quantile( y.new.star, probs = c( 0.025, 0.975 ) ) ) # 2.5% 97.5% # 404.276509 6.463505 392.551676 415.951182 par( mfrow = c( 2, 2 ) ) plot( 1:n.monitor, y.new.star, type = 'l', xlab = 'Iteration Number', ylab = 'y.new' ) plot( density( y.new.star ), xlab = 'y.new', main = '' ) acf( as.ts( y.new.star[ 1:n.monitor ], start = 1, end = n.monitor, frequency = 1 ), lag.max = 10, main = '' ) pacf( as.ts( y.new.star[ 1:n.monitor ], start = 1, end = n.monitor, frequency = 1 ), lag.max = 10, main = '' ) # the ACF and PACF plots show that the MCMC draws from the predictive # distribution behave like white noise, and this is in fact a general # phenomenon in MCMC simulation from predictive distributions (the # resulting draws are IID) print( rho.1.hat.y.new.star <- cor( y.new.star[ 1:( n.monitor - 1 ) ], y.new.star[ 2:n.monitor ] ) ) # 0.006518032 print( se.y.new.star.mean <- ( sd( y.new.star ) / sqrt( n.monitor ) ) * sqrt( ( 1 + rho.1.hat.y.new.star ) / ( 1 - rho.1.hat.y.new.star ) ) ) # 0.02057306 # even though rho.1.hat.y.new.star is essentially 0, the MCSE for # the mean of the posterior predictive distribution is about 3 times # larger than that for nu (the worst-mixing parameter); this is # because the SD of the posterior predictive distribution (6.46) # is so much larger than the posterior SDs of the parameters (we # noticed this in the Poisson length-of-stay case study: prediction # of future data values is much noisier than parameter estimation) # we would conclude that, according to the t model, future weighings # of NB10 should produces values around 404.3 micrograms below # 10 grams, give or take about 6.5 micrograms, and that about 95% # of those future data values should be between about 392.6 and # 416.0 micrograms below 10 grams c( mean( nb10.data.1$y ), sd( nb10.data.1$y ) ) # 404.590000 6.466846 c( mean( y.new.star ), sd( y.new.star ) ) # 404.276509 6.463505 sum( ( 392.6 < nb10.data.1$y ) * ( nb10.data.1$y < 416.0 ) ) / 100 # 0.95 # you can see that the actual data mean and SD values agree well with # the mean and SD of the posterior predictive distribution, and that # (as it happens) exactly 95% of the data values are within the # 95% predictive distribution for future data values; all in all, # this adds up to the conclusion that the t model fits the # NB10 data set well