\documentclass[notitlepage]{article} \usepackage{../../Math556} \usepackage{listings} \usepackage{numprint} \usepackage{enumerate} \usepackage{multirow} \usepackage{bbm} \usepackage{bbold} \usepackage{mathtools} \usepackage{amsfonts,amsmath,amssymb} \usepackage{tikz} \usepackage{forest} \usetikzlibrary{shadows,fit,backgrounds} \forestset{ declare toks={level label}{}, declare toks register={level labels}, level labels={}, declare count register={leveller}, leveller'=0, level split/.style={ temptoksa={#1}, split register={temptoksa}{:}{tempcounta,level label split}, }, level label split/.style={ temptoksb={#1}, temptoksc={}, split register={temptoksb}{,}{temptoksc, level label splitter}, tikz+/.wrap 2 pgfmath args={ \node (label leveller ##1) [anchor=east, align=right, font=\sffamily] at (level ##1.west -| westpoint) {##2}; }{tempcounta}{temptoksc}, before computing xy/.wrap pgfmath arg={ tikz+={ \node [anchor=north east, align=right, font=\rmfamily] at (label leveller ##1.north -| west of westpoint) {LEVEL ##1}; }, }{tempcounta}, }, level label splitter/.style={ temptoksc+={\\#1}, }, label levels/.style={ tikz+={ \coordinate (westpoint) at ([xshift=-15pt]current bounding box.west); }, before packing={ tikz+={ \coordinate (west of westpoint) at ([xshift=-15pt]current bounding box.west); }, }, before drawing tree={ tikz+={ \scoped[on background layer]{\node [left color=blue!50!cyan!25!white, right color=blue!50!cyan!25!white, middle color=blue!50!cyan, inner sep=10pt, rounded corners, draw=blue!50!cyan, draw opacity=.5, fill opacity=.15, fit=(current bounding box)] {};} }, }, delay={ for tree breadth-first={ if level label={}{}{ if={(level())>(leveller)}{ leveller/.option=level, alias/.wrap pgfmath arg={level ##1}{level()}, if level labels={}{}{ level labels+={;}, }, level labels+/.option=level, level labels+={:}, }{}, level labels+/.option=level label, level labels+={,}, }, }, }, before typesetting nodes={ if level labels={}{}{ split register={level labels}{;}{level split}, }, }, } } \setlength{\parindent}{0pt} \def\E{\Expect} \lstloadlanguages{R} \definecolor{keywordcolor}{rgb}{0,0.6,0.6} \definecolor{delimcolor}{rgb}{0.461,0.039,0.102} \definecolor{Rcommentcolor}{rgb}{0.101,0.043,0.432} \lstdefinestyle{Rsettings}{ basicstyle=\ttfamily, breaklines=true, showstringspaces=false, keywords={if, else, function, theFunction, tmp}, % Write as many keywords otherkeywords={}, commentstyle=\itshape\color{Rcommentcolor}, keywordstyle=\color{keywordcolor}, moredelim=[s][\color{delimcolor}]{"}{"}, } \lstset{basicstyle=\ttfamily, numbers=none, literate={~} {$\sim$}{2}} \begin{document} <>= library(knitr) suppressMessages(library(rmutil)) # global chunk options opts_chunk$set(cache=TRUE, autodep=TRUE) options(scipen=999) options(repos=c(CRAN="https://cloud.r-project.org/")) inline_hook <- function (x) { if (is.numeric(x)) { # ifelse does a vectorized comparison # If integer, print without decimal; otherwise print two places res <- ifelse(x == round(x), sprintf("%.6f", x), sprintf("%.6f", x) ) paste(res, collapse = ", ") } } knit_hooks$set(inline = inline_hook) @ \begin{center} \textsclarge{MATH 556: Mathematical Statistics I} \vspace{0.1 in} \textsc{Delta Method: Examples} \end{center} Under the conditions of the Central Limit Theorem (CLT), for random variables $X_1,\ldots,X_n$ and their sample mean random variable $\Xbarn$ \[ \sqrt{n} (\Xbarn - \mu) \CiD X \sim Normal(0,\sigma^2). \] \bigskip We illustrate results using the \textit{Inverse Gaussian distribution} where \[ f_X(x;\mu,\beta) = \One_{(0,\infty)}(x) \sqrt{\frac{1}{2 \pi \beta x^3}} \exp\left\{ - \frac{1}{2 \beta} \frac{(x-\mu)^2}{\mu^2 x} \right\} \] where \[ \E_X[X] = \mu \qquad \qquad \Var_X[X] = \beta \mu^3. \] so we have that $\sigma^2 = \beta \mu^3$. <>= x<-seq(0.01,5,by=0.01) mu1<-2;be1<-1;sig1<-sqrt(be1*mu1^3) fx1<-dinvgauss(x,mu1,sig1) mu2<-4;be2<-0.5;sig2<-sqrt(be2*mu2^3) fx2<-dinvgauss(x,mu2,sig2) mu3<-1.5;be3<-3;sig3<-sqrt(be3*mu3^3) fx3<-dinvgauss(x,mu3,sig3) mu4<-3;be4<-0.05;sig4<-sqrt(be4*mu4^3) fx4<-dinvgauss(x,mu4,sig4) par(mfrow=c(2,2),mar=c(4,4,2,2)) plot(x,fx1,type='l',ylim=range(0,3),ylab='f(x)') title(substitute(paste(mu == m,",",beta==b),list(m=mu1,b=be1))) plot(x,fx2,type='l',ylim=range(0,3),ylab='f(x)') title(substitute(paste(mu == m,",",beta==b),list(m=mu2,b=be2))) plot(x,fx3,type='l',ylim=range(0,3),ylab='f(x)') title(substitute(paste(mu == m,",",beta==b),list(m=mu3,b=be3))) plot(x,fx4,type='l',ylim=range(0,3),ylab='f(x)') title(substitute(paste(mu == m,",",beta==b),list(m=mu4,b=be4))) @ The CLT suggests that as $n \lra \infty$ \[ Z_n = \frac{\sqrt{n}(\Xmean_n - \mu)}{\sqrt{\beta \mu^3}} \CiD Z \sim Normal(0,1) \] which we explore below for $n=10$ and $n=50$. Overlaying the histograms of $N=2000$ realizations from the sampling distribution of $Z_n$ are plots of the standard Normal density (red lines). <>= set.seed(194) n<-10;N<-2000 xv<-seq(-5,5,length=1001);yv<-dnorm(xv) Zn<-replicate(N,sqrt(n)*(mean(rinvgauss(n,mu1,be1))-mu1)/sig1) par(mar=c(4,4,2,1)) ind<-abs(Zn)<5 hist(Zn[ind],breaks=seq(-5,5,by=0.25),freq=FALSE, main=substitute(n==nv,list(nv=n)),ylim=range(0,0.5),xlab=expression(Z[n]));box() lines(xv,yv,col='red') @ <>= n<-50 Zn<-replicate(N,sqrt(n)*(mean(rinvgauss(n,mu1,be1))-mu1)/sig1) par(mar=c(4,4,2,1)) ind<-abs(Zn)<5 hist(Zn[ind],breaks=seq(-5,5,by=0.25),freq=FALSE, main=substitute(n==nv,list(nv=n)),ylim=range(0,0.5),xlab=expression(Z[n]));box() lines(xv,yv,col='red') @ \begin{enumerate}[(a)] \item Consider $g(x) = \log x$, so that $\dot{g}(x) = 1/x$, so that \[ L_n = \sqrt{n} (\log \Xbarn - \log \mu) \CiD X \sim Normal (0,\sigma^2/\mu^2) \] and \[ \log \Xbarn \sim \calAN (\log \mu,\sigma^2/(n\mu^2)) \] so that \[ Z_n = \frac{\sqrt{n}(\log \Xbarn - \log \mu)}{\sqrt{\sigma^2/\mu^2}} \CiD Z \sim Normal(0,1). \] <>= set.seed(194) n<-10;N<-2000 xv<-seq(-5,5,length=1001);yv<-dnorm(xv) sval<-sig1/mu1 Zn<-replicate(N,sqrt(n)*(log(mean(rinvgauss(n,mu1,be1)))-log(mu1))/sval) par(mar=c(4,4,2,1)) hist(Zn,breaks=seq(-5,5,by=0.25),freq=FALSE, main=substitute(n==nv,list(nv=n)),ylim=range(0,0.5),xlab=expression(Z[n]));box() lines(xv,yv,col='red') @ \item Consider the function $g(x) = 1/x$, so $\dot{g}(x) = -1/x^2$. Then the Delta method gives \[ R_n = \sqrt{n} \left(\frac{1}{\Xbarn} - \frac{1}{\mu} \right) \CiD X \sim Normal (0,\sigma^2/\mu^4) \] or, \[ \frac{1}{\Xbarn} \sim \calAN \left( \frac{1}{\mu}, \frac{\sigma^2}{n \mu^4} \right) \] and \[ Z_n = \frac{\sqrt{n}\left(\dfrac{1}{\Xbarn} - \dfrac{1}{\mu}\right)}{\sqrt{\sigma^2/\mu^4}} \CiD Z \sim Normal(0,1). \] Here the distribution of $R_n$ is more skewed, and a larger $n$ is required before the Normal approximation is deemed adequate. <>= set.seed(194);n<-10;N<-2000 xv<-seq(-5,5,length=1001);yv<-dnorm(xv) Zn<-replicate(N,sqrt(n)*(1/(mean(rinvgauss(n,mu1,be1)))-1/mu1)/(sig1/mu1^2)) par(mar=c(4,4,2,1)) ind<-abs(Zn)<5 hist(Zn[ind],breaks=seq(-5,5,by=0.25),freq=FALSE, main=substitute(n==nv,list(nv=n)),ylim=range(0,0.5),xlab=expression(Z[n]));box() lines(xv,yv,col='red') @ <>= n<-100 xv<-seq(-5,5,length=1001);yv<-dnorm(xv) Zn<-replicate(N,sqrt(n)*(1/(mean(rinvgauss(n,mu1,be1)))-1/mu1)/(sval<-sig1/mu1^2)) par(mar=c(4,4,2,1)) ind<-abs(Zn)<5 hist(Zn[ind],breaks=seq(-5,5,by=0.25),freq=FALSE, main=substitute(n==nv,list(nv=n)),ylim=range(0,0.5),xlab=expression(Z[n]));box() lines(xv,yv,col='red') @ \item If $X_1,\ldots,X_n$ are iid $Poisson(\lambda)$, then we have \[ M_n = \sqrt{n} (\Xmean_n - \lambda) \CiD X \sim Normal(0,\lambda) \] so that \[ Z_n = \frac{\sqrt{n}(\Xmean_n - \lambda)}{\sqrt{\lambda}} \CiD Z \sim Normal(0,1). \] \end{enumerate} <>= n<-100 lam<-3 xv<-seq(-5,5,length=1001);yv<-dnorm(xv) Zn<-replicate(N,sqrt(n)*(mean(rpois(n,lam))-lam)/sqrt(lam)) par(mar=c(4,4,2,1)) ind<-abs(Zn)<5 hist(Zn[ind],breaks=seq(-5,5,by=0.25),freq=FALSE, main=substitute(n==nv,list(nv=n)),ylim=range(0,0.5),xlab=expression(Z[n]));box() lines(xv,yv,col='red') @ Consider the case $g(x) = x^2 - 2x$ so that $\dot{g}(x) = 2(x -1)$. If $\lambda \neq 1$, we then have if \[ g(\Xmean_n) = \Xmean_n^2 - 2 \Xmean_n \] then we have that \[ Z_n = \sqrt{n}\frac{(g(\Xmean_n) - g(\lambda))}{\sqrt{4 \lambda (\lambda-1)^2}} \CiD Z \sim Normal(0,1) \] <>= Xbarn<-sqrt(lam)*Zn/sqrt(n)+lam gXn<-Xbarn^2 - 2*Xbarn mu<-lam^2-2*lam sigsq<-4*lam*(lam-1)^2 Zn<-sqrt(n)*(gXn-mu)/sqrt(sigsq) par(mar=c(4,4,2,1)) ind<-abs(Zn)<5 hist(Zn[ind],breaks=seq(-5,5,by=0.25),freq=FALSE, main=substitute(n==nv,list(nv=n)),ylim=range(0,0.5),xlab=expression(Z[n]));box() lines(xv,yv,col='red') @ However, if $\lambda = 1$ this approximation does not work as $\dot{g}(\lambda) = 0$. In this case we need to use the second order Delta method that uses the \textit{second order} Taylor expansion at $\lambda$ \[ g(X_n) = g(\lambda) + \dot{g}(\lambda)(X_n - \lambda) + \frac{\ddot{g}(X^\star)}{2} (X_n - \mu)^2 \] so that as $\dot{g}(\lambda)=0$ when $\lambda=1$, we have by the Taylor expansion for $|X_n - X^\star| < |X_n - \lambda|$ that \[ n(g(X_n) - g(\lambda)) = \frac{\ddot{g}(X^\star)}{2} \{\sqrt{n}(X_n - \lambda)\}^2 \CiD \frac{\ddot{g}(\lambda)}{2} \lambda Z^2 \] where $Z \sim Normal(0,1)$. Here $\ddot{g}(\lambda) = 2$. Thus, by the usual transformation result \[ V_n = n(g(X_n) - g(\lambda)) \CiD V \sim Gamma \left(\frac{1}{2},\frac{1}{\lambda} \right) \] or equivalently \[ W_n = \frac{n(g(X_n) - g(\lambda))}{\lambda} \CiD W \sim Gamma \left(\frac{1}{2},\frac{1}{2} \right) \equiv \chi_1^2. \] <>= n<-100;N<-5000;lam<-1 Xbarn<-replicate(N,mean(rpois(n,lam))) gXn<-Xbarn^2 - 2*Xbarn mu<-lam^2-2*lam Wn<-n*(gXn-mu)/lam;ind<-Wn<10 xv<-seq(0,10,length=1001);yv<-dchisq(xv,1) par(mar=c(3,4,1,1)) hist(Wn[ind],breaks=seq(0,10,by=0.1),freq=FALSE, main=substitute(n==nv,list(nv=n)),xlab=expression(W[n]));box() lines(xv,yv,col='red') n<-1000 Xbarn<-replicate(N,mean(rpois(n,lam))) par(mar=c(3,4,1,1)) gXn<-Xbarn^2 - 2*Xbarn Wn<-n*(gXn-mu)/lam;ind<-Wn<10 hist(Wn[ind],breaks=seq(0,10,by=0.1),freq=FALSE, main=substitute(n==nv,list(nv=n)),xlab=expression(W[n]));box() lines(xv,yv,col='red') @ \end{document}