cereal = read.table("cereal.txt",header=T) cereal attach(cereal) # so we can use variable names names(cereal) # show the assigned names sodium sort(sodium) # ordered array stem(sodium) # stem and leaf plot stem(sodium,scale=0.5) # half as many rows stem(sodium,scale=2) # twice as many rows -- this is hard to read hist(sodium) # histogram hist(sodium,breaks=4) # histogram with fewer bins hist(sodium,breaks=6) # it ignores you here hist(sodium,breaks=c(0,60,120,180,240,300,360)) # force the number of bins par(mfrow=c(2,2)) # put four plots on one page hist(sodium,breaks=4) hist(sodium,breaks=c(0,70,140,210,280,350)) hist(sodium,breaks=c(0,60,120,180,240,300,360)) hist(sodium) hist(sodium,breaks=c(0,70,140,210,280,350)) hist(sodium) hist(sodium,breaks=12) hist(sodium,breaks=23) par(mfrow=c(1,1)) # go back to one plot per page plot(sodium,type="l") # line plot (in order they appear in the dataset) summary(sodium) # min, max, quartiles, and mean min(sodium); median(sodium); mean(sodium); max(sodium) boxplot(sodium) # potential outliers var(sodium) # variance sd(sodium) # standard deviation -- perhaps they aren't outliers hist(fiber) # skewed right summary(fiber) boxplot(fiber) sd(fiber) # these do look like outliers table(shelf) # summary table table(mfr) barplot(table(mfr)) # bar chart table(shelf,mfr) # contingency (cross-tabulation) table barplot(table(shelf,mfr),beside=T) # side-by-side bar chart # plotting the likelihood for the heart attack example # note that n! = gamma(n+1) like = function(n,y,theta) { return(gamma(n+1)/(gamma(y+1)*gamma(n+1-y))*theta^y*(1-theta)^(n-y)) } theta=seq(from=0.01,to=0.99,by=.01) theta plot(theta,like(400,72,theta)) loglike = function(n,y,theta) { return(lgamma(n+1)-lgamma(y+1)-lgamma(n+1-y)+y*log(theta)+(n-y)*log(1-theta)) } plot(theta,loglike(400,72,theta)) plot(theta,loglike(400,72,theta),type="l") loglike(400,72,theta) #exploring likelihoods for more binomial data #first generate some random data for binomials with different p's binoms = numeric(12) for(i in 1:12) binoms[i] = rbinom(1,40,i/13) binoms (1:12/13)*40 # compare empirical and theoretical means round(1:12/13*40,2) # easier to compare after rounding theta=seq(from=0.01,to=0.99,by=.01) par(mfrow=c(3,4)) # fit all 12 plots on one page for(i in 1:12) plot(theta,dbinom(binoms[i],40,theta),type="l") # use ylim to set the range of the y axis for(i in 1:12) plot(theta,dbinom(binoms[i],40,theta),ylim=c(0,.28),type="l") # fix the y axis label for(i in 1:12) plot(theta,dbinom(binoms[i],40,theta),ylim=c(0,.28),type="l",ylab="likelihood") # add a title for(i in 1:12) plot(theta,dbinom(binoms[i],40,theta),ylim=c(0,.28),type="l",ylab="likelihood",main="Binomial Likelihood") # add a more informative title for(i in 1:12) plot(theta,dbinom(binoms[i],40,theta),ylim=c(0,.28),type="l",ylab="likelihood",main=paste("Binomial(40,",round(i/13,2),")",sep="")) par(mfrow=c(1,1))