set.seed(3447)

#Number of experiments
N<-10000

#Number of data
n<-25

#True parameters
beta.0<--2
beta.1<-5

#Matrix to store the estimates
beta.mat<-matrix(0,nrow=N,ncol=2)

#Set the x values: randomly generated on (0,100)
x<-sort(runif(n,0,100))

#Consider the residual errors (epsilon)
#Use a skewed distribution; Gamma(2,0.1), shifted to have mean zero
#Expectation of Gamma(2,0.1) is 20.

#Plot the Gamma(2,0.1) density, shifted by 20.
xvec<-seq(-40,40,by=0.01)
fvec<-dgamma(xvec+20,2,0.1)
plot(xvec,fvec,type='l',ylim=range(0,0.05),xlab=expression(epsilon),ylab=expression(f(epsilon)))
abline(v=0,col='red',lty=2)
normvec<-dnorm(xvec,0,sqrt(200))
lines(xvec,normvec,lty=2,col='blue')
legend(10,0.05,c('Shifted Gamma','Normal'),col=c('black','blue'),lty=c(1,2))
title(expression(paste('Distribution of residual errors, ',epsilon)))


#Plot example data set
#Generate the residual errors
epsilon<-rgamma(n,2,0.1)-20

Y<-beta.0+beta.1*x+epsilon
plot(x,x*0,ylim=range(-50,600),type='n',ylab='y')
points(x,Y,pch=19,cex=0.9)
abline(beta.0,beta.1,col='red')


#Run all the experiments
plot(x,x*0,ylim=range(-50,600),type='n',ylab='y')
for(experiment in 1:N){

	#Generate the residual errors
	#Use a skewed distribution; Gamma(2,0.1), shifted to have mean zero
	#Expectation of Gamma(2,0.1) is 20.

	epsilon<-rgamma(n,2,0.1)-20

	Y<-beta.0+beta.1*x+epsilon
	points(x,Y,pch=19,cex=0.33)

	beta.mat[experiment,]<-coef(lm(Y~x))

}
abline(beta.0,beta.1,col='red')

hist(beta.mat[,1],nclass=25,xlab=expression(hat(beta)[0]),main=expression(paste('Sampling distribution for ',hat(beta)[0])))
abline(v=beta.0,col='red',lwd=2)

hist(beta.mat[,2],nclass=25,xlab=expression(hat(beta)[1]),main=expression(paste('Sampling distribution for ',hat(beta)[1])))
abline(v=beta.1,col='red',lwd=2)

plot(beta.mat,pch=19,cex=0.75,xlab=expression(hat(beta)[0]),ylab=expression(hat(beta)[1]))
abline(v=beta.0,h=beta.1,col='red')
title('Joint Sampling Distribution')

apply(beta.mat,2,mean)

var(beta.mat)

#Here sigma^2 is derived from the Gamma distribution;
#sigma^2 = 2/0.1^2 = 200.

sigsq<-200

X<-cbind(rep(1,n),x)
XtX<-t(X)%*%X
XtXinv<-solve(XtX)

(True.Variance.Matrix<-sigsq*XtXinv)

#Least squares estimators seem to have the predicted properties
#with expectation and variance equal to the analytically computed ones.

#Exercise: what happens if n is increased to 100, or 1000 ?