n<-100 set.seed(101) x<-runif(n,0,100) y<-2.0+1.5*x y<-2.0+1.5*x+rnorm(n)*20 plot(x,y,pch=19) fit1<-lm(y~x) summary(fit1) sig.hat<-summary(fit1)$sigma ############################################# set.seed(28347) nreps<-10000 nvec<-c(10,50,100) sig.ests<-matrix(0,nrow=nreps,ncol=length(nvec)) xmat<-matrix(0,nrow=max(nvec),length(nvec)) for(j in 1:length(nvec)){ xmat[1:nvec[j],j]<-runif(nvec[j],0,100) } for(irep in 1:nreps){ for(j in 1:length(nvec)){ x<-xmat[1:nvec[j],j] y<-2.0+1.5*x+rnorm(nvec[j])*20 fit1<-lm(y~x) sig.ests[irep,j]<-summary(fit1)$sigma^2 } } boxplot(sig.ests) abline(h=20^2,col='red') apply(sig.ests,2,mean) hist((nvec[1]-2)*sig.ests[,1]/20^2,col='gray',breaks=seq(0,40,by=0.5),ylim=range(0,750), xlab=expression(hat(sigma)^2),main='Sampling distribution') box() xv<-seq(0,1500,by=0.5) yv<-dchisq(xv,df=nvec[1]-2) lines(xv,yv*nreps*0.5,col='blue',lwd=2) ############################################# set.seed(28347) nreps<-10000 nvec<-c(10,50,100) sig.ests2<-matrix(0,nrow=nreps,ncol=length(nvec)) xmat<-matrix(0,nrow=max(nvec),length(nvec)) for(j in 1:length(nvec)){ xmat[1:nvec[j],j]<-runif(nvec[j],0,100) } evec<-rgamma(10000,2,sqrt(2/400))-2/sqrt(2/400) hist(evec) mean(evec) var(evec) n<-10000 set.seed(101) x<-runif(n,0,100) y<-2.0+1.5*x y<-2.0+1.5*x+evec plot(x,y,pch=19) fit0<-lm(y~x) plot(x,residuals(fit0)) abline(h=0,lty=2,col='red') hist(residuals(fit0)) var(residuals(fit0)) for(irep in 1:nreps){ for(j in 1:length(nvec)){ x<-xmat[1:nvec[j],j] eps<-rgamma(nvec[j],2,sqrt(2/400))-2/sqrt(2/400) y<-2.0+1.5*x+eps fit1<-lm(y~x) sig.ests2[irep,j]<-summary(fit1)$sigma^2 } } boxplot(sig.ests2) abline(h=20^2,col='red') apply(sig.ests2,2,mean) svec<-(nvec[1]-2)*sig.ests2[,1]/20^2 hist(svec[svec<40],col='gray',breaks=seq(0,40,by=0.5),ylim=range(0,750), xlab=expression(hat(sigma)^2),main='Standardized Sampling distribution') box() xv<-seq(0,1500,by=0.5) yv<-dchisq(xv,df=nvec[1]-2) lines(xv,yv*nreps*0.5,col='blue',lwd=2) svec<-(nvec[3]-2)*sig.ests2[,3]/20^2 hist(svec[svec<250],col='gray',breaks=seq(0,250,by=2),ylim=range(0,750), xlab=expression(hat(sigma)^2),main='Standardized Sampling distribution') box() xv<-seq(0,250,by=0.05) yv<-dchisq(xv,df=nvec[3]-2) lines(xv,yv*nreps*2,col='blue',lwd=2)