# multiple linear regression: Bayesian model averaging # example with the CRAN package BMA # Bayesian model averaging over the uncertainty about which # predictor variables to include # first you install the package BMA from CRAN # (see "How to MCMC sample with R-JAGS" on the course web page # for a reminder of how to do this) # then you load BMA with the command library( BMA ) # bring in the attitude data to compare with BMS results data( attitude ) summary( attitude ) # rating complaints privileges learning raises # Min. :40.00 Min. :37.0 Min. :30.00 Min. :34.00 Min. :43.00 # 1st Qu.:58.75 1st Qu.:58.5 1st Qu.:45.00 1st Qu.:47.00 1st Qu.:58.25 # Median :65.50 Median :65.0 Median :51.50 Median :56.50 Median :63.50 # Mean :64.63 Mean :66.6 Mean :53.13 Mean :56.37 Mean :64.63 # 3rd Qu.:71.75 3rd Qu.:77.0 3rd Qu.:62.50 3rd Qu.:66.75 3rd Qu.:71.00 # Max. :85.00 Max. :90.0 Max. :83.00 Max. :75.00 Max. :88.00 # critical advance # Min. :49.00 Min. :25.00 # 1st Qu.:69.25 1st Qu.:35.00 # Median :77.50 Median :41.00 # Mean :74.77 Mean :42.93 # 3rd Qu.:80.00 3rd Qu.:47.75 # Max. :92.00 Max. :72.00 # outcome variable is rating; the other variables are the predictors attach( attitude ) predictors <- as.matrix( cbind( complaints, privileges, learning, raises, critical, advance ) ) detach( attitude ) head( predictors ) # complaints privileges learning raises critical advance # [1,] 51 30 39 61 92 45 # [2,] 64 51 54 63 73 47 # [3,] 70 68 69 76 86 48 # [4,] 63 45 47 54 84 35 # [5,] 78 56 66 71 83 47 # [6,] 55 49 44 54 49 34 bma.1 <- bicreg( predictors, attitude$rating ) summary( bma.1 ) # Call: # bicreg(x = predictors, y = attitude$rating) # 13 models were selected # Best 5 models (cumulative posterior probability = 0.7052 ): # p!=0 EV SD model 1 model 2 model 3 # Intercept 100.0 12.779552 8.04565 14.37632 9.87088 13.57774 # complaints 100.0 0.699832 0.12873 0.75461 0.64352 0.62273 # privileges 13.1 -0.009930 0.05426 . . . # learning 47.7 0.117974 0.16150 . 0.21119 0.31239 # raises 11.7 0.005908 0.06659 . . . # critical 10.9 0.001124 0.04510 . . . # advance 20.8 -0.031284 0.09553 . . -0.18695 # nVar 1 2 3 # r2 0.681 0.708 0.726 # BIC -30.90490 -30.12970 -28.59037 # post prob 0.294 0.200 0.092 # model 4 model 5 # Intercept 11.98732 15.32762 # complaints 0.71276 0.78034 # privileges . -0.05016 # learning . . # raises 0.08009 . # critical . . # advance . . # nVar 2 2 # r2 0.684 0.683 # BIC -27.74851 -27.66889 # post prob 0.061 0.058 # compare these results with those from BMS (similar but not identical) bma.1$ols # this produces frequentist (ordinary least squares: OLS) estimates for # the regression coefficients in all 13 models examined by BMA # (Intercept) complaints privileges learning raises critical # [1,] 14.376319 0.7546098 0.00000000 0.0000000 0.00000000 0.0000000000 # [2,] 9.870880 0.6435176 0.00000000 0.2111918 0.00000000 0.0000000000 # [3,] 13.577741 0.6227297 0.00000000 0.3123870 0.00000000 0.0000000000 # [4,] 11.987317 0.7127601 0.00000000 0.0000000 0.08008548 0.0000000000 # [5,] 15.327620 0.7803434 -0.05015980 0.0000000 0.00000000 0.0000000000 # [6,] 15.560250 0.7611582 0.00000000 0.0000000 0.00000000 0.0000000000 # [7,] 14.251378 0.7543436 0.00000000 0.0000000 0.00000000 0.0019082025 # [8,] 11.258305 0.6824165 -0.10328426 0.2379762 0.00000000 0.0000000000 # [9,] 10.522603 0.6534882 0.00000000 0.2206857 -0.02863700 0.0000000000 # [10,] 9.813181 0.6433969 0.00000000 0.2111872 0.00000000 0.0008827253 # [11,] 14.303469 0.6533776 -0.07681738 0.3239497 0.00000000 0.0000000000 # [12,] 11.991676 0.5810749 0.00000000 0.2998723 0.10624157 0.0000000000 # [13,] 10.427976 0.6134758 0.00000000 0.3215508 0.00000000 0.0534772523 # advance # [1,] 0.0000000 # [2,] 0.0000000 # [3,] -0.1869508 # [4,] 0.0000000 # [5,] 0.0000000 # [6,] -0.0377341 # [7,] 0.0000000 # [8,] 0.0000000 # [9,] 0.0000000 # [10,] 0.0000000 # [11,] -0.1715098 # [12,] -0.2289009 # [13,] -0.2043912 bma.1$se # this produces standard errors for the OLS estimates above # (Intercept) complaints privileges learning raises critical advance # [1,] 6.619986 0.09753289 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 # [2,] 7.061224 0.11847743 0.0000000 0.1344037 0.0000000 0.0000000 0.0000000 # [3,] 7.543900 0.11814642 0.0000000 0.1541997 0.0000000 0.0000000 0.1448537 # [4,] 8.422567 0.13311968 0.0000000 0.0000000 0.1704740 0.0000000 0.0000000 # [5,] 7.160234 0.11938763 0.1299192 0.0000000 0.0000000 0.0000000 0.0000000 # [6,] 7.898448 0.10177168 0.0000000 0.0000000 0.0000000 0.0000000 0.1317041 # [7,] 11.172337 0.10111981 0.0000000 0.0000000 0.0000000 0.1360686 0.0000000 # [8,] 7.318340 0.12884452 0.1293454 0.1394103 0.0000000 0.0000000 0.0000000 # [9,] 8.304809 0.13637497 0.0000000 0.1496661 0.1824470 0.0000000 0.0000000 # [10,] 11.271531 0.12209153 0.0000000 0.1369656 0.0000000 0.1327267 0.0000000 # [11,] 7.739565 0.13051133 0.1305877 0.1574085 0.0000000 0.0000000 0.1490403 # [12,] 8.241055 0.14428097 0.0000000 0.1582662 0.2049087 0.0000000 0.1677381 # [13,] 11.119026 0.12242530 0.0000000 0.1585141 0.0000000 0.1366781 0.1538700 # the next two commands produce posterior means and SDs for the regression # coefficients averaged over the models with high posterior probability bma.1$postmean # (Intercept) complaints privileges learning raises critical # 12.779551576 0.699832029 -0.009930483 0.117974194 0.005907684 0.001124475 # advance # -0.031283625 bma.1$postsd # 8.04565208 0.12872599 0.05426068 0.16150090 0.06659039 0.04510371 0.09553194 bma.1$which # this summarizes which variables are in each of the 13 models examined # by BMA # complaints privileges learning raises critical advance # [1,] TRUE FALSE FALSE FALSE FALSE FALSE # [2,] TRUE FALSE TRUE FALSE FALSE FALSE # [3,] TRUE FALSE TRUE FALSE FALSE TRUE # [4,] TRUE FALSE FALSE TRUE FALSE FALSE # [5,] TRUE TRUE FALSE FALSE FALSE FALSE # [6,] TRUE FALSE FALSE FALSE FALSE TRUE # [7,] TRUE FALSE FALSE FALSE TRUE FALSE # [8,] TRUE TRUE TRUE FALSE FALSE FALSE # [9,] TRUE FALSE TRUE TRUE FALSE FALSE # [10,] TRUE FALSE TRUE FALSE TRUE FALSE # [11,] TRUE TRUE TRUE FALSE FALSE TRUE # [12,] TRUE FALSE TRUE TRUE FALSE TRUE # [13,] TRUE FALSE TRUE FALSE TRUE TRUE plot( bma.1, mfrow = c( 3, 3 ) ) # this shows the marginal posterior distributions for each coefficient; # vertical spikes indicate the posterior probability that the variable # is not predictively useful # the function bic.glm in BMA can also do Bayesian model averaging # over the uncertainty about which predictors to include in # the class of generalized linear models, which includes things like # logistic regression with dichotomous outcomes (i'll give an # example of this next week) # other CRAN packages that do Bayesian model averaging include # BAS and ensembleBMA