oring=read.table("o-ring.txt") oring names(oring)=c("flight","temp","damage") attach(oring) plot(temp[damage>0],damage[damage>0]) # plot of damaged o-rings vs. temp # does there look like a relationship between temperature and damage? plot(temp,damage) # plot of all data # does this plot look different? # how do you feel about predicting for a temp of 31? oring.lm=lm(damage~temp) summary(oring.lm) Call: lm(formula = damage ~ temp) Residuals: Min 1Q Median 3Q Max -2.1965 -1.5965 -0.5166 0.8034 5.6837 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 18.03561 4.58831 3.931 0.000767 *** temp -0.23999 0.06601 -3.635 0.001548 ** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 2.147 on 21 degrees of freedom Multiple R-Squared: 0.3863, Adjusted R-squared: 0.357 F-statistic: 13.22 on 1 and 21 DF, p-value: 0.001548 # add fitted line to scatterplot lines(temp,fitted(oring.lm)) # 95% confidence interval for the slope -0.23999 - 0.06601*qt(.975,21) -0.23999 + 0.06601*qt(.975,21) # predicted damage for temp of 31: coef(oring.lm) coef(oring.lm)[1] + coef(oring.lm)[2]*31 # should we launch? #prediction interval predict(oring.lm,data.frame(temp=31),interval="predict") # check assumptions plot(fitted(oring.lm),resid(oring.lm)) qqnorm(resid(oring.lm)) # R-Squared demonstration par(ask=T) for(r in c(1,-1,.9, .8, .5, .3, .1, .05, 0, -.4, -.4, -.4, -.4, -.8)) { x=rnorm(200); y=r*x+sqrt(1-r^2)*rnorm(200) plot(x, y,xlab="X",ylab="Y",main=paste("R-Sq = ",cor(x,y)^2,", corr = ",cor(x,y),sep="")) } par(ask=F) # correlation is not causation example storks=read.table("storks.txt",header=T) attach(storks) plot(storks$storks,people,xlab="storks") storks.lm=lm(people~storks$storks) lines(storks$storks,fitted(storks.lm)) summary(storks.lm) Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 36.54715 4.57751 7.984 0.000498 *** storks$storks 0.14910 0.02285 6.525 0.001265 ** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 2.809 on 5 degrees of freedom Multiple R-Squared: 0.8949, Adjusted R-squared: 0.8739 F-statistic: 42.57 on 1 and 5 DF, p-value: 0.001265 # check assumptions plot(fitted(storks.lm),resid(storks.lm)) qqnorm(resid(storks.lm)) # example with outlier olymp=read.table("olympics.txt",header=T) pairs(olymp) attach(olymp) plot(year,long_jump) long.lm=lm(long_jump~year) lines(year,fitted(long.lm)) summary(long.lm) plot(fitted(long.lm),resid(long.lm)) plot(year,resid(long.lm)) # plot looks the same plot(long_jump,resid(long.lm)) # bad plot qqnorm(resid(long.lm)) fitted(long.lm) # fitted values predict(long.lm,data.frame(year=84),interval="predict") # prediction interval