# multiple linear regression example: Bayesian model averaging # example with the CRAN package BMS data( attitude ) help( attitude ) # From a survey of the clerical employees of a large financial # organization, the data are aggregated from the questionnaires # of the approximately 35 employees for each of 30 # (randomly selected) departments. The numbers give the # percent proportion of favourable responses to seven questions # in each department. # Format # A dataframe with 30 observations on 7 variables. # The first column contains the short names from the reference, # the second one the variable names in the data frame: # Y rating numeric Overall rating # X[1] complaints numeric Handling of employee complaints # X[2] privileges numeric Does not allow special privileges # X[3] learning numeric Opportunity to learn # X[4] raises numeric Raises based on performance # X[5] critical numeric Too critical # X[6] advance numeric Advancement attitude # rating complaints privileges learning raises critical advance # 1 43 51 30 39 61 92 45 # 2 63 64 51 54 63 73 47 # 3 71 70 68 69 76 86 48 # 4 61 63 45 47 54 84 35 # 5 81 78 56 66 71 83 47 # 6 43 55 49 44 54 49 34 # 7 58 67 42 56 66 68 35 # 8 71 75 50 55 70 66 41 # 9 72 82 72 67 71 83 31 # 10 67 61 45 47 62 80 41 # 11 64 53 53 58 58 67 34 # 12 67 60 47 39 59 74 41 # 13 69 62 57 42 55 63 25 # 14 68 83 83 45 59 77 35 # 15 77 77 54 72 79 77 46 # 16 81 90 50 72 60 54 36 # 17 74 85 64 69 79 79 63 # 18 65 60 65 75 55 80 60 # 19 65 70 46 57 75 85 46 # 20 50 58 68 54 64 78 52 # 21 50 40 33 34 43 64 33 # 22 64 61 52 62 66 80 41 # 23 53 66 52 50 63 80 37 # 24 40 37 42 58 50 57 49 # 25 63 54 42 48 66 75 33 # 26 66 77 66 63 88 76 72 # 27 78 75 58 74 80 78 49 # 28 48 57 44 45 51 83 38 # 29 85 85 71 71 77 74 55 # 30 82 82 39 59 64 78 39 require( stats ) require( graphics ) pairs( attitude, main = "attitude data" ) summary( attitude ) # rating complaints privileges learning raises # Min. :40.00 Min. :37.0 Min. :30.00 Min. :34.00 Min. :43.00 # 1st Qu.:58.75 1st Qu.:58.5 1st Qu.:45.00 1st Qu.:47.00 1st Qu.:58.25 # Median :65.50 Median :65.0 Median :51.50 Median :56.50 Median :63.50 # Mean :64.63 Mean :66.6 Mean :53.13 Mean :56.37 Mean :64.63 # 3rd Qu.:71.75 3rd Qu.:77.0 3rd Qu.:62.50 3rd Qu.:66.75 3rd Qu.:71.00 # Max. :85.00 Max. :90.0 Max. :83.00 Max. :75.00 Max. :88.00 # critical advance # Min. :49.00 Min. :25.00 # 1st Qu.:69.25 1st Qu.:35.00 # Median :77.50 Median :41.00 # Mean :74.77 Mean :42.93 # 3rd Qu.:80.00 3rd Qu.:47.75 # Max. :92.00 Max. :72.00 cor( attitude ) # rating complaints privileges learning raises critical # rating 1.0000000 0.8254176 0.4261169 0.6236782 0.5901390 0.1564392 # complaints 0.8254176 1.0000000 0.5582882 0.5967358 0.6691975 0.1877143 # privileges 0.4261169 0.5582882 1.0000000 0.4933310 0.4454779 0.1472331 # learning 0.6236782 0.5967358 0.4933310 1.0000000 0.6403144 0.1159652 # raises 0.5901390 0.6691975 0.4454779 0.6403144 1.0000000 0.3768830 # critical 0.1564392 0.1877143 0.1472331 0.1159652 0.3768830 1.0000000 # advance 0.1550863 0.2245796 0.3432934 0.5316198 0.5741862 0.2833432 # advance # rating 0.1550863 # complaints 0.2245796 # privileges 0.3432934 # learning 0.5316198 # raises 0.5741862 # critical 0.2833432 # advance 1.0000000 qqnorm( attitude$rating, main = 'rating' ) qqnorm( attitude$complaints, main = 'complaints' ) qqnorm( attitude$privileges, main = 'privileges' ) qqnorm( attitude$learning, main = 'learning' ) qqnorm( attitude$raises, main = 'raises' ) qqnorm( attitude$critical, main = 'critical' ) summary( fm1 <- lm( rating ~ ., data = attitude ) ) # Call: # lm(formula = rating ~ ., data = attitude) # Residuals: # Min 1Q Median 3Q Max # -10.9418 -4.3555 0.3158 5.5425 11.5990 # Coefficients: # Estimate Std. Error t value Pr(>|t|) # (Intercept) 10.78708 11.58926 0.931 0.361634 # complaints 0.61319 0.16098 3.809 0.000903 *** # privileges -0.07305 0.13572 -0.538 0.595594 # learning 0.32033 0.16852 1.901 0.069925 . # raises 0.08173 0.22148 0.369 0.715480 # critical 0.03838 0.14700 0.261 0.796334 # advance -0.21706 0.17821 -1.218 0.235577 # --- # Signif. codes: 0 "***" 0.001 "**" 0.01 "*" 0.05 "." 0.1 " " 1 # Residual standard error: 7.068 on 23 degrees of freedom # Multiple R-squared: 0.7326, Adjusted R-squared: 0.6628 # F-statistic: 10.5 on 6 and 23 DF, p-value: 1.24e-05 opar <- par( mfrow = c( 2, 2 ), oma = c( 0, 0, 1.1, 0 ), mar = c( 4.1, 4.1, 2.1, 1.1 ) ) plot( fm1 ) summary( fm2 <- lm( rating ~ complaints, data = attitude ) ) # Call: # lm(formula = rating ~ complaints, data = attitude) # Residuals: # Min 1Q Median 3Q Max # -12.8799 -5.9905 0.1783 6.2978 9.6294 # Coefficients: # Estimate Std. Error t value Pr(>|t|) # (Intercept) 14.37632 6.61999 2.172 0.0385 * # complaints 0.75461 0.09753 7.737 1.99e-08 *** # --- # Signif. codes: 0 "***" 0.001 "**" 0.01 "*" 0.05 "." 0.1 " " 1 # Residual standard error: 6.993 on 28 degrees of freedom # Multiple R-squared: 0.6813, Adjusted R-squared: 0.6699 # F-statistic: 59.86 on 1 and 28 DF, p-value: 1.988e-08 plot( fm2 ) par( opar ) # Bayesian model averaging over the uncertainty about which # predictor variables to include # first you install the package BMS from CRAN # (see "How to MCMC sample with R-JAGS" on the course web page # for a reminder of how to do this) library( BMS ) att <- bms( attitude, mprior = "uniform", g = "UIP", user.int = F ) coef( att ) # PIP Post Mean Post SD Cond.Pos.Sign Idx # complaints 0.9996351 0.684449094 0.13038429 1.00000000 1 # learning 0.4056392 0.096481513 0.15135419 1.00000000 3 # advance 0.2129325 -0.026686161 0.09133894 0.00000107 6 # privileges 0.1737658 -0.011854183 0.06143387 0.00046267 2 # raises 0.1665853 0.010567022 0.08355244 0.73338938 4 # critical 0.1535886 0.001034563 0.05465097 0.89769774 5 coef( att, std.coefs = T, order.by.pip = F, include.constant = T ) # PIP Post Mean Post SD Cond.Pos.Sign Idx # complaints 0.9996351 0.7486734114 0.14261872 1.00000000 1 # privileges 0.1737658 -0.0119154065 0.06175116 0.00046267 2 # learning 0.4056392 0.0930292869 0.14593855 1.00000000 3 # raises 0.1665853 0.0090258498 0.07136653 0.73338938 4 # critical 0.1535886 0.0008409819 0.04442502 0.89769774 5 # advance 0.2129325 -0.0225561446 0.07720309 0.00000107 6 # (Intercept) 1.0000000 1.2015488514 NA NA 0 summary( att ) # Mean no. regressors Draws Burnins Time # "2.1121" "64" "0" "0.02100205 secs" # No. models visited Modelspace 2^K % visited % Topmodels # "64" "64" "100" "100" # Corr PMP No. Obs. Model Prior g-Prior # "NA" "30" "uniform / 3" "UIP" # Shrinkage-Stats # "Av=0.9677" topmodels.bma( att )[, 1:6 ] # 20 28 29 24 30 21 # complaints 1.0000000 1.0000000 1.00000000 1.00000000 1.00000000 1.00000000 # privileges 0.0000000 0.0000000 0.00000000 0.00000000 1.00000000 0.00000000 # learning 0.0000000 1.0000000 1.00000000 0.00000000 0.00000000 0.00000000 # raises 0.0000000 0.0000000 0.00000000 1.00000000 0.00000000 0.00000000 # critical 0.0000000 0.0000000 0.00000000 0.00000000 0.00000000 0.00000000 # advance 0.0000000 0.0000000 1.00000000 0.00000000 0.00000000 1.00000000 # PMP (Exact) 0.2947721 0.1661679 0.06678871 0.05891111 0.05690948 0.05509459 # PMP (MCMC) 0.2947721 0.1661679 0.06678871 0.05891111 0.05690948 0.05509459 image( att ) plotModelsize( att ) density( att, reg = "complaints" ) predicted.att <- bms( attitude[ 1:21, ], mprior = "uniform", g = "UIP", user.int = F ) pdens <- pred.density( predicted.att, newdata = attitude[ 22:30, ] ) par( mfrow = c( 3, 3 ) ) plot( pdens, 1, main = 'observation 22' ) points( x = attitude$rating[ 22 ], y = 0, pch = 19, col = 'black' ) plot( pdens, 2, new = F, main = 'observation 23' ) points( x = attitude$rating[ 23 ], y = 0, pch = 19, col = 'black' ) plot( pdens, 3, new = F, main = 'observation 24' ) points( x = attitude$rating[ 24 ], y = 0, pch = 19, col = 'black' ) plot( pdens, 4, new = F, main = 'observation 25' ) points( x = attitude$rating[ 25 ], y = 0, pch = 19, col = 'black' ) plot( pdens, 5, new = F, main = 'observation 26' ) points( x = attitude$rating[ 26 ], y = 0, pch = 19, col = 'black' ) plot( pdens, 6, new = F, main = 'observation 27' ) points( x = attitude$rating[ 27 ], y = 0, pch = 19, col = 'black' ) plot( pdens, 7, new = F, main = 'observation 28' ) points( x = attitude$rating[ 28 ], y = 0, pch = 19, col = 'black' ) plot( pdens, 8, new = F, main = 'observation 29' ) points( x = attitude$rating[ 29 ], y = 0, pch = 19, col = 'black' ) plot( pdens, 9, new = F, main = 'observation 30' ) points( x = attitude$rating[ 30 ], y = 0, pch = 19, col = 'black' ) print( bma.log.score <- - lps.bma( pdens, attitude[ 22:30, 1 ] ) ) # -3.482696