\documentclass[notitlepage]{article}
\usepackage{../../Math556}
\usepackage{listings}
\usepackage{numprint}
\usepackage{enumerate}
\usepackage{multirow}
\usepackage{bbm}
\usepackage{bbold}
\usepackage{mathtools}
\usepackage{amsfonts,amsmath,amssymb}
\usepackage{tikz}

\usepackage{forest}
\usetikzlibrary{shadows,fit,backgrounds}
\forestset{
  declare toks={level label}{},
  declare toks register={level labels},
  level labels={},
  declare count register={leveller},
  leveller'=0,
  level split/.style={
    temptoksa={#1},
    split register={temptoksa}{:}{tempcounta,level label split},
  },
  level label split/.style={
    temptoksb={#1},
    temptoksc={},
    split register={temptoksb}{,}{temptoksc, level label splitter},
    tikz+/.wrap 2 pgfmath args={
      \node (label leveller ##1) [anchor=east, align=right, font=\sffamily] at (level ##1.west -| westpoint) {##2};
    }{tempcounta}{temptoksc},
    before computing xy/.wrap pgfmath arg={
      tikz+={
        \node [anchor=north east, align=right, font=\rmfamily] at (label leveller ##1.north -| west of westpoint) {LEVEL ##1};
      },
    }{tempcounta},
  },
  level label splitter/.style={
    temptoksc+={\\#1},
  },
  label levels/.style={
    tikz+={
      \coordinate (westpoint) at ([xshift=-15pt]current bounding box.west);
    },
    before packing={
        tikz+={
        \coordinate (west of westpoint) at ([xshift=-15pt]current bounding box.west);
      },
    },
    before drawing tree={
      tikz+={
        \scoped[on background layer]{\node [left color=blue!50!cyan!25!white, right color=blue!50!cyan!25!white, middle color=blue!50!cyan, inner sep=10pt, rounded corners, draw=blue!50!cyan, draw opacity=.5, fill opacity=.15, fit=(current bounding box)] {};}
      },
    },
    delay={
      for tree breadth-first={
        if level label={}{}{
          if={(level())>(leveller)}{
            leveller/.option=level,
            alias/.wrap pgfmath arg={level ##1}{level()},
            if level labels={}{}{
              level labels+={;},
            },
            level labels+/.option=level,
            level labels+={:},
          }{},
          level labels+/.option=level label,
          level labels+={,},
        },
      },
    },
    before typesetting nodes={
      if level labels={}{}{
        split register={level labels}{;}{level split},
      },
    },
  }
}

\setlength{\parindent}{0pt}

\def\E{\Expect}


\lstloadlanguages{R}

\definecolor{keywordcolor}{rgb}{0,0.6,0.6}
\definecolor{delimcolor}{rgb}{0.461,0.039,0.102}
\definecolor{Rcommentcolor}{rgb}{0.101,0.043,0.432}

\lstdefinestyle{Rsettings}{
  basicstyle=\ttfamily,
  breaklines=true,
  showstringspaces=false,
  keywords={if, else, function, theFunction, tmp}, % Write as many keywords
  otherkeywords={},
  commentstyle=\itshape\color{Rcommentcolor},
  keywordstyle=\color{keywordcolor},
  moredelim=[s][\color{delimcolor}]{"}{"},
}

\lstset{basicstyle=\ttfamily, numbers=none, literate={~} {$\sim$}{2}}

\begin{document}

<<setup, cache=FALSE, echo=FALSE, warning=FALSE, messages=FALSE>>=
library(knitr)
suppressMessages(library(rmutil))

# global chunk options
opts_chunk$set(cache=TRUE, autodep=TRUE)
options(scipen=999)
options(repos=c(CRAN="https://cloud.r-project.org/"))
inline_hook <- function (x) {
  if (is.numeric(x)) {
    # ifelse does a vectorized comparison
    # If integer, print without decimal; otherwise print two places
    res <- ifelse(x == round(x),
      sprintf("%.6f", x),
      sprintf("%.6f", x)
    )
    paste(res, collapse = ", ")
  }
}
knit_hooks$set(inline = inline_hook)
@

\begin{center}
\textsclarge{MATH 556: Mathematical Statistics I}

\vspace{0.1 in} \textsc{Delta Method: Examples}
\end{center}

Under the conditions of the Central Limit Theorem (CLT), for random variables $X_1,\ldots,X_n$ and their sample mean random variable $\Xbarn$
\[
\sqrt{n} (\Xbarn - \mu) \CiD X \sim Normal(0,\sigma^2).
\]

\bigskip

We illustrate results using the \textit{Inverse Gaussian distribution} where
\[
f_X(x;\mu,\beta) = \One_{(0,\infty)}(x) \sqrt{\frac{1}{2 \pi \beta x^3}} \exp\left\{ - \frac{1}{2 \beta} \frac{(x-\mu)^2}{\mu^2 x} \right\}
\]
where
\[
\E_X[X] = \mu \qquad \qquad \Var_X[X] = \beta \mu^3.
\]
so we have that $\sigma^2 = \beta \mu^3$.


<<C00,comment='+', fig.width=7, fig.height=4.75, fig.align='center',echo=FALSE>>=
x<-seq(0.01,5,by=0.01)
mu1<-2;be1<-1;sig1<-sqrt(be1*mu1^3)
fx1<-dinvgauss(x,mu1,sig1)
mu2<-4;be2<-0.5;sig2<-sqrt(be2*mu2^3)
fx2<-dinvgauss(x,mu2,sig2)
mu3<-1.5;be3<-3;sig3<-sqrt(be3*mu3^3)
fx3<-dinvgauss(x,mu3,sig3)
mu4<-3;be4<-0.05;sig4<-sqrt(be4*mu4^3)
fx4<-dinvgauss(x,mu4,sig4)
par(mfrow=c(2,2),mar=c(4,4,2,2))
plot(x,fx1,type='l',ylim=range(0,3),ylab='f(x)')
title(substitute(paste(mu == m,",",beta==b),list(m=mu1,b=be1)))
plot(x,fx2,type='l',ylim=range(0,3),ylab='f(x)')
title(substitute(paste(mu == m,",",beta==b),list(m=mu2,b=be2)))
plot(x,fx3,type='l',ylim=range(0,3),ylab='f(x)')
title(substitute(paste(mu == m,",",beta==b),list(m=mu3,b=be3)))
plot(x,fx4,type='l',ylim=range(0,3),ylab='f(x)')
title(substitute(paste(mu == m,",",beta==b),list(m=mu4,b=be4)))
@

The CLT  suggests that as $n \lra \infty$
\[
Z_n = \frac{\sqrt{n}(\Xmean_n - \mu)}{\sqrt{\beta \mu^3}} \CiD Z \sim Normal(0,1)
\]
which we explore below for $n=10$ and $n=50$.  Overlaying the histograms of $N=2000$ realizations from the sampling distribution of $Z_n$ are plots of the standard Normal density (red lines).

<<C01,comment='+', fig.width=7, fig.height=4.25, fig.align='center'>>=
set.seed(194)
n<-10;N<-2000
xv<-seq(-5,5,length=1001);yv<-dnorm(xv)
Zn<-replicate(N,sqrt(n)*(mean(rinvgauss(n,mu1,be1))-mu1)/sig1)
par(mar=c(4,4,2,1))
ind<-abs(Zn)<5
hist(Zn[ind],breaks=seq(-5,5,by=0.25),freq=FALSE,
        main=substitute(n==nv,list(nv=n)),ylim=range(0,0.5),xlab=expression(Z[n]));box()
lines(xv,yv,col='red')
@

<<C03,comment='+', fig.width=7, fig.height=4.25, fig.align='center',echo=FALSE>>=
n<-50
Zn<-replicate(N,sqrt(n)*(mean(rinvgauss(n,mu1,be1))-mu1)/sig1)
par(mar=c(4,4,2,1))
ind<-abs(Zn)<5
hist(Zn[ind],breaks=seq(-5,5,by=0.25),freq=FALSE,
        main=substitute(n==nv,list(nv=n)),ylim=range(0,0.5),xlab=expression(Z[n]));box()
lines(xv,yv,col='red')
@


\begin{enumerate}[(a)]
\item Consider $g(x) = \log x$, so that $\dot{g}(x) = 1/x$, so that
\[
L_n = \sqrt{n} (\log \Xbarn - \log \mu) \CiD X \sim Normal (0,\sigma^2/\mu^2)
\]
and
\[
\log \Xbarn  \sim \calAN (\log \mu,\sigma^2/(n\mu^2))
\]
so that
\[
Z_n = \frac{\sqrt{n}(\log \Xbarn - \log \mu)}{\sqrt{\sigma^2/\mu^2}} \CiD Z \sim Normal(0,1).
\]

<<C04,comment='+', fig.width=7, fig.height=4.0, fig.align='center'>>=
set.seed(194)
n<-10;N<-2000
xv<-seq(-5,5,length=1001);yv<-dnorm(xv)
sval<-sig1/mu1
Zn<-replicate(N,sqrt(n)*(log(mean(rinvgauss(n,mu1,be1)))-log(mu1))/sval)
par(mar=c(4,4,2,1))
hist(Zn,breaks=seq(-5,5,by=0.25),freq=FALSE,
        main=substitute(n==nv,list(nv=n)),ylim=range(0,0.5),xlab=expression(Z[n]));box()
lines(xv,yv,col='red')
@

\item Consider the function $g(x) = 1/x$, so $\dot{g}(x) = -1/x^2$.  Then the Delta method gives
\[
R_n = \sqrt{n} \left(\frac{1}{\Xbarn} - \frac{1}{\mu} \right) \CiD X \sim Normal (0,\sigma^2/\mu^4)
\]
or,
\[
\frac{1}{\Xbarn} \sim \calAN \left( \frac{1}{\mu}, \frac{\sigma^2}{n \mu^4} \right)
\]
and
\[
Z_n = \frac{\sqrt{n}\left(\dfrac{1}{\Xbarn} - \dfrac{1}{\mu}\right)}{\sqrt{\sigma^2/\mu^4}} \CiD Z \sim Normal(0,1).
\]
Here the distribution of $R_n$ is more skewed, and a larger $n$ is required before the Normal approximation is deemed adequate.

<<C05,comment='+', fig.width=7, fig.height=4, fig.align='center'>>=
set.seed(194);n<-10;N<-2000
xv<-seq(-5,5,length=1001);yv<-dnorm(xv)
Zn<-replicate(N,sqrt(n)*(1/(mean(rinvgauss(n,mu1,be1)))-1/mu1)/(sig1/mu1^2))
par(mar=c(4,4,2,1))
ind<-abs(Zn)<5
hist(Zn[ind],breaks=seq(-5,5,by=0.25),freq=FALSE,
        main=substitute(n==nv,list(nv=n)),ylim=range(0,0.5),xlab=expression(Z[n]));box()
lines(xv,yv,col='red')
@

<<C06,comment='+', fig.width=7, fig.height=4.25, fig.align='center',echo=FALSE>>=
n<-100
xv<-seq(-5,5,length=1001);yv<-dnorm(xv)
Zn<-replicate(N,sqrt(n)*(1/(mean(rinvgauss(n,mu1,be1)))-1/mu1)/(sval<-sig1/mu1^2))
par(mar=c(4,4,2,1))
ind<-abs(Zn)<5
hist(Zn[ind],breaks=seq(-5,5,by=0.25),freq=FALSE,
        main=substitute(n==nv,list(nv=n)),ylim=range(0,0.5),xlab=expression(Z[n]));box()
lines(xv,yv,col='red')
@

\item If $X_1,\ldots,X_n$ are iid $Poisson(\lambda)$, then we have
\[
M_n = \sqrt{n} (\Xmean_n - \lambda) \CiD X \sim Normal(0,\lambda)
\]
so that
\[
Z_n = \frac{\sqrt{n}(\Xmean_n - \lambda)}{\sqrt{\lambda}} \CiD Z \sim Normal(0,1).
\]


\end{enumerate}

<<C07,comment='+', fig.width=7, fig.height=4.0, fig.align='center',echo=TRUE>>=
n<-100
lam<-3
xv<-seq(-5,5,length=1001);yv<-dnorm(xv)
Zn<-replicate(N,sqrt(n)*(mean(rpois(n,lam))-lam)/sqrt(lam))
par(mar=c(4,4,2,1))
ind<-abs(Zn)<5
hist(Zn[ind],breaks=seq(-5,5,by=0.25),freq=FALSE,
        main=substitute(n==nv,list(nv=n)),ylim=range(0,0.5),xlab=expression(Z[n]));box()
lines(xv,yv,col='red')
@

Consider the case $g(x) = x^2 - 2x$ so that $\dot{g}(x) = 2(x -1)$.  If $\lambda \neq 1$, we then have if
\[
g(\Xmean_n) = \Xmean_n^2 - 2 \Xmean_n
\]
then we have that
\[
Z_n = \sqrt{n}\frac{(g(\Xmean_n) - g(\lambda))}{\sqrt{4 \lambda (\lambda-1)^2}}  \CiD Z \sim Normal(0,1)
\]
<<C08,comment='+', fig.width=7, fig.height=4.25, fig.align='center',echo=TRUE>>=
Xbarn<-sqrt(lam)*Zn/sqrt(n)+lam
gXn<-Xbarn^2 - 2*Xbarn
mu<-lam^2-2*lam
sigsq<-4*lam*(lam-1)^2
Zn<-sqrt(n)*(gXn-mu)/sqrt(sigsq)
par(mar=c(4,4,2,1))
ind<-abs(Zn)<5
hist(Zn[ind],breaks=seq(-5,5,by=0.25),freq=FALSE,
        main=substitute(n==nv,list(nv=n)),ylim=range(0,0.5),xlab=expression(Z[n]));box()
lines(xv,yv,col='red')
@
However, if $\lambda = 1$ this approximation does not work as $\dot{g}(\lambda) = 0$.  In this case we need to use the second order Delta method that uses the \textit{second order} Taylor expansion at $\lambda$
\[
g(X_n) = g(\lambda) + \dot{g}(\lambda)(X_n - \lambda) + \frac{\ddot{g}(X^\star)}{2} (X_n - \mu)^2
\]
so that as  $\dot{g}(\lambda)=0$ when $\lambda=1$, we have by the Taylor expansion for $|X_n - X^\star| < |X_n - \lambda|$ that
\[
n(g(X_n) - g(\lambda)) = \frac{\ddot{g}(X^\star)}{2} \{\sqrt{n}(X_n - \lambda)\}^2 \CiD  \frac{\ddot{g}(\lambda)}{2} \lambda Z^2
\]
where $Z \sim Normal(0,1)$.  Here $\ddot{g}(\lambda) = 2$.  Thus, by the usual transformation result
\[
V_n = n(g(X_n) - g(\lambda)) \CiD V \sim Gamma \left(\frac{1}{2},\frac{1}{\lambda} \right)
\]
or equivalently
\[
W_n = \frac{n(g(X_n) - g(\lambda))}{\lambda} \CiD W \sim Gamma \left(\frac{1}{2},\frac{1}{2} \right) \equiv \chi_1^2.
\]
<<C09,comment='+', fig.width=7, fig.height=4.0, fig.align='center',echo=TRUE>>=
n<-100;N<-5000;lam<-1
Xbarn<-replicate(N,mean(rpois(n,lam)))
gXn<-Xbarn^2 - 2*Xbarn
mu<-lam^2-2*lam
Wn<-n*(gXn-mu)/lam;ind<-Wn<10
xv<-seq(0,10,length=1001);yv<-dchisq(xv,1)
par(mar=c(3,4,1,1))
hist(Wn[ind],breaks=seq(0,10,by=0.1),freq=FALSE,
        main=substitute(n==nv,list(nv=n)),xlab=expression(W[n]));box()
lines(xv,yv,col='red')
n<-1000
Xbarn<-replicate(N,mean(rpois(n,lam)))
par(mar=c(3,4,1,1))
gXn<-Xbarn^2 - 2*Xbarn
Wn<-n*(gXn-mu)/lam;ind<-Wn<10
hist(Wn[ind],breaks=seq(0,10,by=0.1),freq=FALSE,
        main=substitute(n==nv,list(nv=n)),xlab=expression(W[n]));box()
lines(xv,yv,col='red')
@


\end{document}