\documentclass[notitlepage]{article}
\usepackage{../Math323}
\usepackage{listings}
\usepackage{numprint}
\usepackage{enumerate}
\usepackage{multirow}
\usepackage{bbm}

\usepackage{amsfonts,amsmath,amssymb}

\setlength{\parindent}{0pt}

\lstloadlanguages{R}

\definecolor{keywordcolor}{rgb}{0,0.6,0.6}
\definecolor{delimcolor}{rgb}{0.461,0.039,0.102}
\definecolor{Rcommentcolor}{rgb}{0.101,0.043,0.432}

\lstdefinestyle{Rsettings}{
  basicstyle=\ttfamily,
  breaklines=true,
  showstringspaces=false,
  keywords={if, else, function, theFunction, tmp}, % Write as many keywords
  otherkeywords={},
  commentstyle=\itshape\color{Rcommentcolor},
  keywordstyle=\color{keywordcolor},
  moredelim=[s][\color{delimcolor}]{"}{"},
}

\lstset{basicstyle=\ttfamily, numbers=none, literate={~} {$\sim$}{2}}

\begin{document}

<<setup, cache=FALSE, echo=FALSE>>=
library(knitr)
# global chunk options
opts_chunk$set(cache=TRUE, autodep=TRUE)
options(scipen=999)
options(repos=c(CRAN="https://cloud.r-project.org/"))
inline_hook <- function (x) {
  if (is.numeric(x)) {
    # ifelse does a vectorized comparison
    # If integer, print without decimal; otherwise print two places
    res <- ifelse(x == round(x),
      sprintf("%.3f", x),
      sprintf("%.3f", x)
    )
    paste(res, collapse = ", ")
  }
}
knit_hooks$set(inline = inline_hook)
@
\begin{center}
{\textsclarge{Convergence Theorems}}
\end{center}

Suppose that $Y_1,Y_2,\ldots,Y_n,$ are independent random variables that have the same distribution, and have expectation $\mu$ and variance $\sigma^2$.

\begin{itemize}

\item \textbf{The Weak Law of Large Numbers (WLLN):}  For each $n$, let
\[
\overline{Y}_n = \frac{1}{n} \sum_{i=1}^n Y_i
\]
be the sample mean random variable.  Then $\overline{Y}_n \CiP \mu$ as $n \lra \infty$, that is, the probability distribution of $\overline{Y}_n$ becomes concentrated at $\mu$.

\medskip

\item \textbf{The Central Limit Theorem (CLT):} The random variable
\[
U_n = \frac{\sqrt{n}(\overline{Y}_n - \mu)}{\sigma}
\]
\textit{converges in distribution} to a \textit{standard Normal distribution}.  We could also write
\[
U_n = \frac{(\overline{Y}_n - \mu)}{\sigma/\sqrt{n}}
\qquad \text{or} \qquad U_n = \frac{\left(\sum\limits_{i=1}^n Y_i - n \mu \right)}{\sqrt{n} \sigma}.
\]
Thus, when $n$ is large, we can approximate the distribution of $\overline{Y}_n$, or of
\[
S_n = \sum_{i=1}^n Y_i
\]
using a Normal distribution, irrespective of the actual distribution of $Y_1,\ldots,Y_n$.



\end{itemize}

\bigskip

\textbf{EXAMPLE:  $Bernoulli(p)$}.

\smallskip
If $Y_i \sim Bernoulli(p)$, then $\E[Y_i] = p$ and $\V[Y_i] = p(1-p)$.  The WLLN suggests that
\[
\overline{Y}_n \CiP p
\]
whereas the CLT suggests that
\[
U_n = \frac{\sqrt{n}(\overline{Y}_n - p)}{\sqrt{p(1-p)}} \CiD Normal(0,1).
\]
We can check these results in simulation: we generate \texttt{M} repeated sequences of length \texttt{N}, and study the behaviour of $\overline{Y}_n$ and $U_n$.  In the simulation below, we choose $M=10$ and $N=1000$.  We first plot the trace plots of $\overline{Y}_n$ for $n=1,\ldots,N$ for 10 different replicated runs.

<<dists01,comment='+', fig.width=7.5, fig.height=5, fig.align='center'>>=
set.seed(1010)
M<-10
N<-1000
p<-0.3
ybar<-replicate(M,cumsum(rbinom(N,1,p))/c(1:N))
par(mar=c(4,4,1,0))
plot(1:N,ybar[,1],type='l',xlab='n',ylab=expression(bar(y)[n]),ylim=range(0,1))
for(j in 2:M){lines(1:N,ybar[,j])}
abline(h=p,col='red',lwd=2)
@
It is clear that the sample means are converging to $p=0.3$ (marked as the red line) for each of the repeated runs.

\medskip
To check the CLT, we increase $M$ to 2000, and reduce $N$ to 500.  For each replicate run, and each $n$, we compute
\[
U_n = \frac{\sqrt{n}(\overline{Y}_n - p)}{\sqrt{p(1-p)}}
\]
which the CLT predicts will converge to a standard Normal random variable as $n$ gets large.

<<dists02,comment='+', fig.width=7.5, fig.height=5, fig.align='center'>>=
M<-2000
N<-500
ybar<-replicate(M,cumsum(rbinom(N,1,p))/c(1:N))
Un<-(ybar-p)/sqrt(p*(1-p))
Un<-apply(Un,2,function(x){return(x*sqrt(1:N))})
@
We can check the claim of Normality by computing mean, variance and \textit{quantiles} of the sampled values; the $\alpha$ quantile of a distribution is the value $y_\alpha$ such that
\[
F(y_\alpha) = \alpha.
\]
For example, the $\alpha=0.5$ quantile is the \textit{median}; the $\alpha=0.25$ quantile is the \textit{lower quartile}; the $\alpha=0.75$ quantile is the \textit{upper quartile} etc.  We compare these summary values with the values of the standard Normal distribution for $n=100,200,300,400,500$.
<<dists03,comment='+', fig.width=7.5, fig.height=5, fig.align='center'>>=
Un.sub<-t(Un[100*c(1:5),])
qvec<-c(0.025,0.25,0.5,0.75,0.975)
Un.summary<-apply(Un.sub,2,function(x){return(c(mean(x),var(x),quantile(x,qvec)))})
Un.summary<-cbind(Un.summary,c(0,1,qnorm(qvec)))
colnames(Un.summary)<-c(paste("n=",seq(100,500,by=100),sep=""),"Z")
rownames(Un.summary)[1:2]<-c("mean","var")
round(Un.summary,4)
@

%<<dists04,comment='+', fig.width=7, fig.height=7, fig.align='center'>>=
%par(mar=c(5,4,3,2),mfrow=c(2,2))
%Un.sub<-t(Un[100*c(1:5),])
%for(j in 1:5){
%   uvec<-Un.sub[,j]
%    uvec<-uvec[uvec >= -3.5 & uvec <= 3.5]
%    hist(uvec,breaks=seq(-3.5,3.5,by=0.25),col='gray',ylim=range(0,250),
%            main=substitute( paste("n=",nval),list(nval = 100*j)),xlab=expression(u[n]))
%    box()
%}
%@

We can compute the pmf of $U_n$ exactly using transformation methods;  we know that 
\[
S_n = \sum_{i=1}^n Y_i \sim Binomial(n,p)
\]
-- this can be very simply proved using mgf methods -- so that
\[
p_{S_n}(s) = \dbinom{n}{s} p^s (1-p)^{n-s} \qquad s \in \{0,1,2,\ldots,n\}
\]
with $p_{S_n}(s) = 0$ for other values of $s$.  Therefore $U_n$ also has a discrete distribution, with the same probabilities, but restricted to the set of values
\[
u = \frac{\sqrt{n}(s/n - p)}{\sqrt{p(1-p)}}
\]
for $s \in \{0,1,2,\ldots,n\}$.   We compare the \textit{cumulative} probabilities associated with the distribution and its approximation; the CLT tells us that
\[
F_{U_n}(u) = P(U_n \leq u) \bumpeq \Phi (u)
\]
where $\Phi(.)$ is the standard Normal cdf.  The computation is performed for $n=20,40,60,80,100$.  In \texttt{R}, \texttt{pbinom} computes the binomial cdf
and \texttt{pnorm} computes the Normal cdf.

<<dists05,comment='+', fig.width=7, fig.height=8, fig.align='center'>>=
par(mar=c(4,4,2,2),mfrow=c(3,2))
nvec<-seq(20,100,by=20)
xv0<-seq(-2.5,2.5,by=0.5);yv0<-pnorm(xv0)
Fmat<-matrix(0,nrow=length(nvec),ncol=length(xv0))
xv<-seq(-5,5,by=0.01);yv<-pnorm(xv)
for(j in 1:length(nvec)){
    ivec<-0:nvec[j]
    uvec<-sqrt(nvec[j])*(ivec/nvec[j]-p)/sqrt(p*(1-p))
    Pvec<-pbinom(0:nvec[j],nvec[j],p)
    plot(uvec,Pvec,pch=19,cex=0.5,xlim=range(-5,5),
         main=substitute( paste("n=",nval),list(nval = nvec[j])),
         xlab=expression(u),ylab=expression(F[n](u)))
    lines(xv,yv,col='red')
    for(k in 1:length(xv0)){Fmat[j,k]<-ifelse(is.na(Pvec[max(which(uvec<=xv0[k]))]),0,
                                                    Pvec[max(which(uvec<=xv0[k]))])}
}
Fmat<-rbind(yv0,Fmat);colnames(Fmat)<-xv0;
rownames(Fmat)<-c("Z",paste("n=",seq(20,100,by=20),sep=""))
round(Fmat,3)
@
Here the red line is the plot of the standard normal cdf; clearly the approximation is quite good even for $n=40$.

\pagebreak

\textbf{EXAMPLE:  Continuous $Uniform(0,1)$}.

\smallskip
If $Y_i \sim Uniform(0,1)$, then $\E[Y_i] = 1/2$ and $\V[Y_i] = 1/12$.  The WLLN suggests that
\[
\overline{Y}_n \CiP \frac{1}{2}
\]
whereas the CLT suggests that
\[
U_n = \frac{\sqrt{n}(\overline{Y}_n - 1/2)}{\sqrt{11/144}} \CiD Normal(0,1).
\]
In the simulation below, we choose $M=10$ repeated sequences of maximal length $N=1000$.  The trace plots of $\overline{Y}_n$ for $n=1,\ldots,N$ for 10 different replicated runs.  It is clear that the sample means are converging to $1/2$ (marked as the red line) for each of the repeated runs.

<<dists11,comment='+', fig.width=7.5, fig.height=5, fig.align='center'>>=
set.seed(1010)
M<-10
N<-1000
ybar<-replicate(M,cumsum(runif(N))/c(1:N))
par(mar=c(4,4,1,0))
plot(1:N,ybar[,1],type='l',xlab='n',ylab=expression(bar(y)[n]),ylim=range(0,1))
for(j in 2:M){lines(1:N,ybar[,j])}
abline(h=1/2,col='red',lwd=2)
@

\medskip
To check the CLT, we increase $M$ to 2000, and reduce $N$ to 500.  For each replicate run, and each $n$, we compute
\[
U_n = \frac{\sqrt{n}(\overline{Y}_n - 1/2)}{\sqrt{11/144}}
\]
which the CLT predicts will converge to a standard Normal random variable as $n$ gets large.

<<dists12,comment='+', fig.width=7.5, fig.height=5, fig.align='center'>>=
M<-2000
N<-500
ybar<-replicate(M,cumsum(runif(N))/c(1:N))
Un<-(ybar-1/2)/sqrt(11/144)
Un<-apply(Un,2,function(x){return(x*sqrt(1:N))})
@
We can check the claim of Normality by computing mean, variance and \textit{quantiles} of the sampled values  We compare these summary values with the values of the standard Normal distribution for $n=100,200,300,400,500$.
<<dists13,comment='+', fig.width=7.5, fig.height=5, fig.align='center'>>=
Un.sub<-t(Un[100*c(1:5),])
qvec<-c(0.025,0.25,0.5,0.75,0.975)
Un.summary<-apply(Un.sub,2,function(x){return(c(mean(x),var(x),quantile(x,qvec)))})
Un.summary<-cbind(Un.summary,c(0,1,qnorm(qvec)))
colnames(Un.summary)<-c(paste("n=",seq(100,500,by=100),sep=""),"Z")
rownames(Un.summary)[1:2]<-c("mean","var")
round(Un.summary,4)
@

In the uniform case, it is not straightforward to compute the distribution of $U_n$ analytically.  We instead compare the histograms of $U_n$ across the $M$ replicates with the standard normal pdf.  The computation is performed for $n=10,20,30,40,50$.

<<dists14,comment='+', fig.width=8, fig.height=3.25, fig.align='center'>>=
par(mar=c(3,3,1,2),mfrow=c(1,2))
Un.sub<-t(Un[10*c(1:5),])
xv<-seq(-3.5,3.5,by=0.001)
yv<-dnorm(xv)
for(j in 1:5){
    uvec<-Un.sub[,j]
    uvec<-uvec[uvec >= -3.5 & uvec <= 3.5]
    hist(uvec,breaks=seq(-3.5,3.5,by=0.25),col='gray',ylim=range(0,250),cex.axis=0.75,
            main=substitute( paste("n=",nval),list(nval = 10*j)),xlab=expression(u[n]))
    lines(xv,yv*M*0.25,col='red')
    box()
}
@
Here the red line is the plot of the standard normal cdf; the approximation is quite good even for $n=10$.

\bigskip

\textbf{EXAMPLE:  $Exponential (1)$}.

\smallskip
If $Y_i \sim Exponential(1)$, then $\E[Y_i] = 1$ and $\V[Y_i] = 1$.  The WLLN suggests that
\[
\overline{Y}_n \CiP 1
\]
whereas the CLT suggests that
\[
U_n = \frac{\sqrt{n}(\overline{Y}_n - 1)}{\sqrt{1}} \CiD Normal(0,1).
\]
In the simulation below, we choose $M=10$ repeated sequences of maximal length $N=1000$.  The trace plots of $\overline{Y}_n$ for $n=1,\ldots,N$ for 10 different replicated runs.  It is clear that the sample means are converging to $1$ (marked as the red line) for each of the repeated runs.

<<dists21,comment='+', fig.width=7.5, fig.height=5, fig.align='center'>>=
set.seed(1010)
M<-10
N<-1000
ybar<-replicate(M,cumsum(rexp(N))/c(1:N))
par(mar=c(4,4,1,0))
plot(1:N,ybar[,1],type='l',xlab='n',ylab=expression(bar(y)[n]),ylim=range(0,2))
for(j in 2:M){lines(1:N,ybar[,j])}
abline(h=1,col='red',lwd=2)
@

\medskip
To check the CLT, we increase $M$ to 2000, and reduce $N$ to 500.  For each replicate run, and each $n$, we compute
\[
U_n = \sqrt{n}(\overline{Y}_n - 1)
\]
which the CLT predicts will converge to a standard Normal random variable as $n$ gets large.

<<dists22,comment='+', fig.width=7.5, fig.height=5, fig.align='center'>>=
M<-2000;N<-500
ybar<-replicate(M,cumsum(rexp(N))/c(1:N))
Un<-(ybar-1)/sqrt(1)
Un<-apply(Un,2,function(x){return(x*sqrt(1:N))})
@
We can check the claim of Normality by computing mean, variance and \textit{quantiles} of the sampled values  We compare these summary values with the values of the standard Normal distribution for $n=100,200,300,400,500$.
<<dists23,comment='+', fig.width=7.5, fig.height=5, fig.align='center'>>=
Un.sub<-t(Un[100*c(1:5),])
qvec<-c(0.025,0.25,0.5,0.75,0.975)
Un.summary<-apply(Un.sub,2,function(x){return(c(mean(x),var(x),quantile(x,qvec)))})
Un.summary<-cbind(Un.summary,c(0,1,qnorm(qvec)))
colnames(Un.summary)<-c(paste("n=",seq(100,500,by=100),sep=""),"Z")
rownames(Un.summary)[1:2]<-c("mean","var")
round(Un.summary,4)
@

In the Exponential case, we can show using mgfs that
\[
S_n = \sum_{i=1}^n Y_i \sim Gamma(n,1)
\]
and therefore we can compute the cdf of $U_n$ analytically from first principles:
\[
F_{U_n}(u) = P(U_n \leq u) = P \left(\sqrt{n}(\overline{Y}_n - 1) \leq u \right) = P(S_n \leq \sqrt{n} u + n) = F_{S_n}(\sqrt{n} u + n)
\]

<<dists24,comment='+', fig.width=8, fig.height=3.25, fig.align='center'>>=
par(mar=c(4,4,2,2),mfrow=c(1,2))
nvec<-seq(20,100,by=20)
xv0<-seq(-2.5,2.5,by=0.5);yv0<-pnorm(xv0)
Fmat<-matrix(0,nrow=length(nvec),ncol=length(xv0))
xv<-seq(-5,5,by=0.01);yv<-pnorm(xv)
for(j in 1:length(nvec)){
    ivec<-0:nvec[j]
    svec<-sqrt(nvec[j])*xv+nvec[j]
    Fvec<-pgamma(svec,nvec[j],1)
    plot(xv,Fvec,pch=19,cex=0.5,xlim=range(-5,5),type='l',
         main=substitute( paste("n=",nval),list(nval = nvec[j])),
         xlab=expression(u),ylab=expression(F[n](u)))
    lines(xv,yv,col='red')
    Fmat[j,]<-pgamma(sqrt(nvec[j])*xv0+nvec[j],nvec[j],1)
}
Fmat<-rbind(yv0,Fmat);colnames(Fmat)<-xv0;
rownames(Fmat)<-c("Z",paste("n=",seq(20,100,by=20),sep=""))
round(Fmat,3)
@
In these plots, the black (exact cdf of $U_n$) and red (standard Normal approximation to the distribution) are almost identical.

\bigskip

We also compare the histograms of $U_n$ across the $M$ replicates with the standard normal pdf.  The computation is performed for $n=10,20,30,40,50$.

<<dists25,comment='+', fig.width=8, fig.height=3.25, fig.align='center'>>=
par(mar=c(3,3,1,2),mfrow=c(1,2))
Un.sub<-t(Un[10*c(1:5),])
xv<-seq(-3.5,3.5,by=0.001)
yv<-dnorm(xv)
for(j in 1:5){
    uvec<-Un.sub[,j]
    uvec<-uvec[uvec >= -3.5 & uvec <= 3.5]
    hist(uvec,breaks=seq(-3.5,3.5,by=0.25),col='gray',ylim=range(0,250),cex.axis=0.75,
            main=substitute( paste("n=",nval),list(nval = 10*j)),xlab=expression(u[n]))
    lines(xv,yv*M*0.25,col='red');box()
}
@
Here the red line is the plot of the standard normal cdf; the approximation is good for $n=50$, but not so good for smaller $n$.
\end{document}