# multiple linear regression example: Bayesian model averaging
# example with the CRAN package BMS

data( attitude )

help( attitude )

# From a survey of the clerical employees of a large financial 
# organization, the data are aggregated from the questionnaires 
# of the approximately 35 employees for each of 30 
# (randomly selected) departments. The numbers give the 
# percent proportion of favourable responses to seven questions 
# in each department.

# Format

# A dataframe with 30 observations on 7 variables. 
# The first column contains the short names from the reference, 
# the second one the variable names in the data frame:

# Y    rating     numeric Overall rating
# X[1] complaints numeric Handling of employee complaints
# X[2] privileges numeric Does not allow special privileges
# X[3] learning   numeric Opportunity to learn
# X[4] raises     numeric Raises based on performance
# X[5] critical   numeric Too critical
# X[6] advance    numeric Advancement

attitude

#    rating complaints privileges learning raises critical advance
# 1      43         51         30       39     61       92      45
# 2      63         64         51       54     63       73      47
# 3      71         70         68       69     76       86      48
# 4      61         63         45       47     54       84      35
# 5      81         78         56       66     71       83      47
# 6      43         55         49       44     54       49      34
# 7      58         67         42       56     66       68      35
# 8      71         75         50       55     70       66      41
# 9      72         82         72       67     71       83      31
# 10     67         61         45       47     62       80      41
# 11     64         53         53       58     58       67      34
# 12     67         60         47       39     59       74      41
# 13     69         62         57       42     55       63      25
# 14     68         83         83       45     59       77      35
# 15     77         77         54       72     79       77      46
# 16     81         90         50       72     60       54      36
# 17     74         85         64       69     79       79      63
# 18     65         60         65       75     55       80      60
# 19     65         70         46       57     75       85      46
# 20     50         58         68       54     64       78      52
# 21     50         40         33       34     43       64      33
# 22     64         61         52       62     66       80      41
# 23     53         66         52       50     63       80      37
# 24     40         37         42       58     50       57      49
# 25     63         54         42       48     66       75      33
# 26     66         77         66       63     88       76      72
# 27     78         75         58       74     80       78      49
# 28     48         57         44       45     51       83      38
# 29     85         85         71       71     77       74      55
# 30     82         82         39       59     64       78      39

require( stats )

require( graphics )

pairs( attitude, main = "attitude data" )

summary( attitude )

#      rating        complaints     privileges       learning         raises     
#  Min.   :40.00   Min.   :37.0   Min.   :30.00   Min.   :34.00   Min.   :43.00  
#  1st Qu.:58.75   1st Qu.:58.5   1st Qu.:45.00   1st Qu.:47.00   1st Qu.:58.25  
#  Median :65.50   Median :65.0   Median :51.50   Median :56.50   Median :63.50  
#  Mean   :64.63   Mean   :66.6   Mean   :53.13   Mean   :56.37   Mean   :64.63  
#  3rd Qu.:71.75   3rd Qu.:77.0   3rd Qu.:62.50   3rd Qu.:66.75   3rd Qu.:71.00  
#  Max.   :85.00   Max.   :90.0   Max.   :83.00   Max.   :75.00   Max.   :88.00  
#     critical         advance     
#  Min.   :49.00   Min.   :25.00  
#  1st Qu.:69.25   1st Qu.:35.00  
#  Median :77.50   Median :41.00  
#  Mean   :74.77   Mean   :42.93  
#  3rd Qu.:80.00   3rd Qu.:47.75  
#  Max.   :92.00   Max.   :72.00  

cor( attitude )

#               rating complaints privileges  learning    raises  critical
# rating     1.0000000  0.8254176  0.4261169 0.6236782 0.5901390 0.1564392
# complaints 0.8254176  1.0000000  0.5582882 0.5967358 0.6691975 0.1877143
# privileges 0.4261169  0.5582882  1.0000000 0.4933310 0.4454779 0.1472331
# learning   0.6236782  0.5967358  0.4933310 1.0000000 0.6403144 0.1159652
# raises     0.5901390  0.6691975  0.4454779 0.6403144 1.0000000 0.3768830
# critical   0.1564392  0.1877143  0.1472331 0.1159652 0.3768830 1.0000000
# advance    0.1550863  0.2245796  0.3432934 0.5316198 0.5741862 0.2833432
#              advance
# rating     0.1550863
# complaints 0.2245796
# privileges 0.3432934
# learning   0.5316198
# raises     0.5741862
# critical   0.2833432
# advance    1.0000000

qqnorm( attitude$rating, main = 'rating' )

qqnorm( attitude$complaints, main = 'complaints' )

qqnorm( attitude$privileges, main = 'privileges' )

qqnorm( attitude$learning, main = 'learning' )

qqnorm( attitude$raises, main = 'raises' )

qqnorm( attitude$critical, main = 'critical' )

summary( fm1 <- lm( rating ~ ., data = attitude ) )

# Call:
# lm(formula = rating ~ ., data = attitude)

# Residuals:
#      Min       1Q   Median       3Q      Max 
# -10.9418  -4.3555   0.3158   5.5425  11.5990 

# Coefficients:
#             Estimate Std. Error t value Pr(>|t|)    
# (Intercept) 10.78708   11.58926   0.931 0.361634    
# complaints   0.61319    0.16098   3.809 0.000903 ***
# privileges  -0.07305    0.13572  -0.538 0.595594    
# learning     0.32033    0.16852   1.901 0.069925 .  
# raises       0.08173    0.22148   0.369 0.715480    
# critical     0.03838    0.14700   0.261 0.796334    
# advance     -0.21706    0.17821  -1.218 0.235577    
# ---
# Signif. codes:  0 "***" 0.001 "**" 0.01 "*" 0.05 "." 0.1 " " 1 

# Residual standard error: 7.068 on 23 degrees of freedom
# Multiple R-squared: 0.7326,     Adjusted R-squared: 0.6628 
# F-statistic:  10.5 on 6 and 23 DF,  p-value: 1.24e-05 

opar <- par( mfrow = c( 2, 2 ), oma = c( 0, 0, 1.1, 0 ),
  mar = c( 4.1, 4.1, 2.1, 1.1 ) )

plot( fm1 )

summary( fm2 <- lm( rating ~ complaints, data = attitude ) )

# Call:
# lm(formula = rating ~ complaints, data = attitude)

# Residuals:
#      Min       1Q   Median       3Q      Max 
# -12.8799  -5.9905   0.1783   6.2978   9.6294 

# Coefficients:
#             Estimate Std. Error t value Pr(>|t|)    
# (Intercept) 14.37632    6.61999   2.172   0.0385 *  
# complaints   0.75461    0.09753   7.737 1.99e-08 ***
# ---
# Signif. codes:  0 "***" 0.001 "**" 0.01 "*" 0.05 "." 0.1 " " 1 

# Residual standard error: 6.993 on 28 degrees of freedom
# Multiple R-squared: 0.6813,     Adjusted R-squared: 0.6699 
# F-statistic: 59.86 on 1 and 28 DF,  p-value: 1.988e-08 

plot( fm2 )

par( opar )

# Bayesian model averaging over the uncertainty about which
# predictor variables to include

# first you install the package BMS from CRAN
# (see "How to MCMC sample with R-JAGS" on the course web page
# for a reminder of how to do this)

library( BMS )

att <- bms( attitude, mprior = "uniform", g = "UIP", user.int = F )

coef( att )

#                  PIP    Post Mean    Post SD Cond.Pos.Sign Idx
# complaints 0.9996351  0.684449094 0.13038429    1.00000000   1
# learning   0.4056392  0.096481513 0.15135419    1.00000000   3
# advance    0.2129325 -0.026686161 0.09133894    0.00000107   6
# privileges 0.1737658 -0.011854183 0.06143387    0.00046267   2
# raises     0.1665853  0.010567022 0.08355244    0.73338938   4
# critical   0.1535886  0.001034563 0.05465097    0.89769774   5

coef( att, std.coefs = T, order.by.pip = F, include.constant = T )

#                   PIP     Post Mean    Post SD Cond.Pos.Sign Idx
# complaints  0.9996351  0.7486734114 0.14261872    1.00000000   1
# privileges  0.1737658 -0.0119154065 0.06175116    0.00046267   2
# learning    0.4056392  0.0930292869 0.14593855    1.00000000   3
# raises      0.1665853  0.0090258498 0.07136653    0.73338938   4
# critical    0.1535886  0.0008409819 0.04442502    0.89769774   5
# advance     0.2129325 -0.0225561446 0.07720309    0.00000107   6
# (Intercept) 1.0000000  1.2015488514         NA            NA   0

summary( att )

# Mean no. regressors               Draws             Burnins                Time 
#            "2.1121"                "64"                 "0"   "0.02100205 secs" 
#  No. models visited      Modelspace 2^K           % visited         % Topmodels 
#                "64"                "64"               "100"               "100" 
#            Corr PMP            No. Obs.         Model Prior             g-Prior 
#                "NA"                "30"       "uniform / 3"               "UIP" 
#     Shrinkage-Stats 
#         "Av=0.9677" 

topmodels.bma( att )[, 1:6 ]

#                    20        28         29         24         30         21
# complaints  1.0000000 1.0000000 1.00000000 1.00000000 1.00000000 1.00000000
# privileges  0.0000000 0.0000000 0.00000000 0.00000000 1.00000000 0.00000000
# learning    0.0000000 1.0000000 1.00000000 0.00000000 0.00000000 0.00000000
# raises      0.0000000 0.0000000 0.00000000 1.00000000 0.00000000 0.00000000
# critical    0.0000000 0.0000000 0.00000000 0.00000000 0.00000000 0.00000000
# advance     0.0000000 0.0000000 1.00000000 0.00000000 0.00000000 1.00000000
# PMP (Exact) 0.2947721 0.1661679 0.06678871 0.05891111 0.05690948 0.05509459
# PMP (MCMC)  0.2947721 0.1661679 0.06678871 0.05891111 0.05690948 0.05509459

image( att )

plotModelsize( att )

density( att, reg = "complaints" )

predicted.att <- bms( attitude[ 1:21, ], mprior = "uniform", g = "UIP", 
  user.int = F )

pdens <- pred.density( predicted.att, newdata = attitude[ 22:30, ] )

par( mfrow = c( 3, 3 ) )

plot( pdens, 1, main = 'observation 22' )

points( x = attitude$rating[ 22 ], y = 0, pch = 19, col = 'black' )

plot( pdens, 2, new = F, main = 'observation 23' )

points( x = attitude$rating[ 23 ], y = 0, pch = 19, col = 'black' )

plot( pdens, 3, new = F, main = 'observation 24' )

points( x = attitude$rating[ 24 ], y = 0, pch = 19, col = 'black' )

plot( pdens, 4, new = F, main = 'observation 25' )

points( x = attitude$rating[ 25 ], y = 0, pch = 19, col = 'black' )

plot( pdens, 5, new = F, main = 'observation 26' )

points( x = attitude$rating[ 26 ], y = 0, pch = 19, col = 'black' )

plot( pdens, 6, new = F, main = 'observation 27' )

points( x = attitude$rating[ 27 ], y = 0, pch = 19, col = 'black' )

plot( pdens, 7, new = F, main = 'observation 28' )

points( x = attitude$rating[ 28 ], y = 0, pch = 19, col = 'black' )

plot( pdens, 8, new = F, main = 'observation 29' )

points( x = attitude$rating[ 29 ], y = 0, pch = 19, col = 'black' )

plot( pdens, 9, new = F, main = 'observation 30' )

points( x = attitude$rating[ 30 ], y = 0, pch = 19, col = 'black' )

print( bma.log.score <- - lps.bma( pdens, attitude[ 22:30, 1 ] ) )

# -3.482696