data.source<-"http://www.math.mcgill.ca/dstephens/Regression/Data/2-1-RocketProp.csv" RocketProp<-read.csv(file=data.source) names(RocketProp)<-c('i','Strength','Age') x<-RocketProp$Age y<-RocketProp$Strength plot(x,y) plot(x,y,pch=19) plot(x,y,pch=19,xlab='Age',ylab='Shear Strength') (xmean<-mean(x)) (ymean<-mean(y)) abline(v=xmean,h=ymean,lty=2) ############################################ #Fit the simple linear regression using lm fit.RP<-lm(y~x) summary(fit.RP) coef(fit.RP) abline(coef(fit.RP),col='red') title('Line of best fit for Rocket Propulsion Data') ############################################ plot(fit.RP) ############################################ #Influence Measures lm.influence(fit.RP) influence.measures(fit.RP) plot(x,influence.measures(fit.RP)$infmat[,5]) plot(y,influence.measures(fit.RP)$infmat[,5]) ################################################## #LifeCycleSavings LifeCycleSavings str(LifeCycleSavings) fit1 <- lm(sr ~ pop15 + pop75 + dpi + ddpi, data = LifeCycleSavings) summary(fit1) par(mfrow=c(2,2)) plot(fit1) dev.print(pdf,file='InfluenceResiduals.pdf',width=8,height=8) inf.diags<-lm.influence(fit1) data.frame(hat=inf.diags$hat[c(1:7,42:50)]) data.frame(signif(inf.diags$coef[c(1:7,42:50),],4)) influence.measures(fit1)[c(1:7,42:50),] set.seed(54) n<-500 X<-rnorm(n,5,2) pdf('PPQQ-500.pdf',paper='USr',width=10,height=8) par(mfrow=c(1,2),pty='s') #P-P xvec.pp<-(c(1:n)-0.5)/n yvec.pp<-pnorm((sort(X)-mean(X))/sqrt(var(X))) plot(xvec.pp,yvec.pp,pch=19,cex=0.5,ylim=range(0,1),xlim=range(0,1), xlab='Theoretical percentiles',ylab='Sample percentiles') abline(0,1,col='red',lwd=2);title("P-P plot") #Q-Q xvec.qq<-qnorm((c(1:n)-0.5)/n) yvec.qq<-sort(X) plot(xvec.qq,yvec.qq,pch=19,cex=0.5,xlim=range(-3,3),ylim=range(0,10), xlab='Theoretical quantiles',ylab='Sample quantiles') abline(5,2,col='red',lwd=2);title("Q-Q plot") legend(-3,10,c(expression(mu+sigma*x)),lwd=2,col='red') dev.off() set.seed(54) n<-50 X<-rnorm(n,5,2) pdf('PPQQ-50.pdf',paper='USr',width=10,height=8) par(mfrow=c(1,2),pty='s') #P-P xvec.pp<-(c(1:n)-0.5)/n yvec.pp<-pnorm((sort(X)-mean(X))/sqrt(var(X))) plot(xvec.pp,yvec.pp,pch=19,cex=0.5,ylim=range(0,1),xlim=range(0,1), xlab='Theoretical percentiles',ylab='Sample percentiles') abline(0,1,col='red',lwd=2);title("P-P plot") #Q-Q xvec.qq<-qnorm((c(1:n)-0.5)/n) yvec.qq<-sort(X) plot(xvec.qq,yvec.qq,pch=19,cex=0.5,xlim=range(-3,3),ylim=range(0,10), xlab='Theoretical quantiles',ylab='Sample quantiles') abline(5,2,col='red',lwd=2);title("Q-Q plot") legend(-3,10,c(expression(mu+sigma*x)),lwd=2,col='red') dev.off() nreps<-5000 set.seed(2332) qq.vals50<-matrix(0,nrow=nreps,ncol=50) pp.vals50<-matrix(0,nrow=nreps,ncol=50) qq.vals500<-matrix(0,nrow=nreps,ncol=500) pp.vals500<-matrix(0,nrow=nreps,ncol=500) for(irep in 1:nreps){ n<-50 X<-rnorm(n,5,2) pp.vals50[irep,]<-pnorm((sort(X)-mean(X))/sqrt(var(X))) qq.vals50[irep,]<-sort(X) n<-500 X<-rnorm(n,5,2) pp.vals500[irep,]<-pnorm((sort(X)-mean(X))/sqrt(var(X))) qq.vals500[irep,]<-sort(X) } n<-50 xvec.pp.50<-(c(1:n)-0.5)/n xvec.qq.50<-qnorm((c(1:n)-0.5)/n) n<-500 xvec.pp.500<-(c(1:n)-0.5)/n xvec.qq.500<-qnorm((c(1:n)-0.5)/n) pp.ci.50<-apply(pp.vals50,2,quantile,probs=c(0.025,0.975)) qq.ci.50<-apply(qq.vals50,2,quantile,probs=c(0.025,0.975)) pp.ci.500<-apply(pp.vals500,2,quantile,probs=c(0.025,0.975)) qq.ci.500<-apply(qq.vals500,2,quantile,probs=c(0.025,0.975)) pdf('PPQQ-ci-500.pdf',paper='USr',width=10,height=8) par(mfrow=c(1,2),pty='s') plot(xvec.pp.500,pp.ci.500[1,],ylim=range(0,1),xlim=range(0,1), xlab='Theoretical percentiles',ylab='Sample percentiles',type='l',lty=2) lines(xvec.pp.500,pp.ci.500[2,],lty=2) set.seed(54) n<-500 X<-rnorm(n,5,2) xvec.pp<-(c(1:n)-0.5)/n yvec.pp<-pnorm((sort(X)-mean(X))/sqrt(var(X))) points(xvec.pp,yvec.pp,pch=19,cex=0.5) abline(0,1,col='red',lwd=2);title("P-P plot") #Q-Q xvec.qq<-qnorm((c(1:n)-0.5)/n) yvec.qq<-sort(X) plot(xvec.qq.500,qq.ci.500[1,],ylim=range(0,10),xlim=range(-3,3), xlab='Theoretical quantiles',ylab='Sample quantiles',type='l',lty=2) lines(xvec.qq.500,qq.ci.500[2,],lty=2) points(xvec.qq,yvec.qq,pch=19,cex=0.5) abline(5,2,col='red',lwd=2);title("Q-Q plot") legend(-3,10,c(expression(mu+sigma*x)),lwd=2,col='red') dev.off() pdf('PPQQ-ci-50.pdf',paper='USr',width=10,height=8) par(mfrow=c(1,2),pty='s') plot(xvec.pp.50,pp.ci.50[1,],ylim=range(0,1),xlim=range(0,1), xlab='Theoretical percentiles',ylab='Sample percentiles',type='l',lty=2) lines(xvec.pp.50,pp.ci.50[2,],lty=2) set.seed(54) n<-50 X<-rnorm(n,5,2) xvec.pp<-(c(1:n)-0.5)/n yvec.pp<-pnorm((sort(X)-mean(X))/sqrt(var(X))) points(xvec.pp,yvec.pp,pch=19,cex=0.5) abline(0,1,col='red',lwd=2);title("P-P plot") #Q-Q xvec.qq<-qnorm((c(1:n)-0.5)/n) yvec.qq<-sort(X) plot(xvec.qq.50,qq.ci.50[1,],ylim=range(0,10),xlim=range(-3,3), xlab='Theoretical quantiles',ylab='Sample quantiles',type='l',lty=2) lines(xvec.qq.50,qq.ci.50[2,],lty=2) points(xvec.qq,yvec.qq,pch=19,cex=0.5) abline(5,2,col='red',lwd=2);title("Q-Q plot") legend(-3,10,c(expression(mu+sigma*x)),lwd=2,col='red') dev.off()