\documentclass[notitlepage]{article}
\usepackage{../../Math556}
\usepackage{listings}
\usepackage{numprint}
\usepackage{enumerate}
\usepackage{multirow}
\usepackage{bbm}
\usepackage{bbold}
\usepackage{mathtools}
\usepackage{amsfonts,amsmath,amssymb}
\usepackage{tikz}

\usepackage{forest}
\usetikzlibrary{shadows,fit,backgrounds}
\forestset{
  declare toks={level label}{},
  declare toks register={level labels},
  level labels={},
  declare count register={leveller},
  leveller'=0,
  level split/.style={
    temptoksa={#1},
    split register={temptoksa}{:}{tempcounta,level label split},
  },
  level label split/.style={
    temptoksb={#1},
    temptoksc={},
    split register={temptoksb}{,}{temptoksc, level label splitter},
    tikz+/.wrap 2 pgfmath args={
      \node (label leveller ##1) [anchor=east, align=right, font=\sffamily] at (level ##1.west -| westpoint) {##2};
    }{tempcounta}{temptoksc},
    before computing xy/.wrap pgfmath arg={
      tikz+={
        \node [anchor=north east, align=right, font=\rmfamily] at (label leveller ##1.north -| west of westpoint) {LEVEL ##1};
      },
    }{tempcounta},
  },
  level label splitter/.style={
    temptoksc+={\\#1},
  },
  label levels/.style={
    tikz+={
      \coordinate (westpoint) at ([xshift=-15pt]current bounding box.west);
    },
    before packing={
        tikz+={
        \coordinate (west of westpoint) at ([xshift=-15pt]current bounding box.west);
      },
    },
    before drawing tree={
      tikz+={
        \scoped[on background layer]{\node [left color=blue!50!cyan!25!white, right color=blue!50!cyan!25!white, middle color=blue!50!cyan, inner sep=10pt, rounded corners, draw=blue!50!cyan, draw opacity=.5, fill opacity=.15, fit=(current bounding box)] {};}
      },
    },
    delay={
      for tree breadth-first={
        if level label={}{}{
          if={(level())>(leveller)}{
            leveller/.option=level,
            alias/.wrap pgfmath arg={level ##1}{level()},
            if level labels={}{}{
              level labels+={;},
            },
            level labels+/.option=level,
            level labels+={:},
          }{},
          level labels+/.option=level label,
          level labels+={,},
        },
      },
    },
    before typesetting nodes={
      if level labels={}{}{
        split register={level labels}{;}{level split},
      },
    },
  }
}

\setlength{\parindent}{0pt}

\def\E{\Expect}


\lstloadlanguages{R}

\definecolor{keywordcolor}{rgb}{0,0.6,0.6}
\definecolor{delimcolor}{rgb}{0.461,0.039,0.102}
\definecolor{Rcommentcolor}{rgb}{0.101,0.043,0.432}

\lstdefinestyle{Rsettings}{
  basicstyle=\ttfamily,
  breaklines=true,
  showstringspaces=false,
  keywords={if, else, function, theFunction, tmp}, % Write as many keywords
  otherkeywords={},
  commentstyle=\itshape\color{Rcommentcolor},
  keywordstyle=\color{keywordcolor},
  moredelim=[s][\color{delimcolor}]{"}{"},
}

\lstset{basicstyle=\ttfamily, numbers=none, literate={~} {$\sim$}{2}}

\begin{document}

<<setup, cache=FALSE, echo=FALSE>>=
library(knitr)
# global chunk options
opts_chunk$set(cache=TRUE, autodep=TRUE)
options(scipen=999)
options(repos=c(CRAN="https://cloud.r-project.org/"))
inline_hook <- function (x) {
  if (is.numeric(x)) {
    # ifelse does a vectorized comparison
    # If integer, print without decimal; otherwise print two places
    res <- ifelse(x == round(x),
      sprintf("%.6f", x),
      sprintf("%.6f", x)
    )
    paste(res, collapse = ", ")
  }
}
knit_hooks$set(inline = inline_hook)
@

\begin{center}
\textsclarge{MATH 556: Mathematical Statistics I}

\vspace{0.1 in} \textsc{Hierarchical Models: Variance Components}
\end{center}

Consider the three-level hierarchical model:
\begin{itemize}
\item[] \text{LEVEL 3}: $\tau^2,\sigma^2 > 0$, fixed parameters;

\medskip

\item[] \text{LEVEL 2}: $M_1,\ldots,M_L \sim Normal(0,\tau^2)$ independent;

\medskip

\item[] \textrm{LEVEL 1}:  For $l=1,\ldots,L$
\[
X_{l1},\ldots,X_{l n_l}| M_l = m_l \sim Normal(m_l,\sigma^2)
\]
where all the $X_{lj}$ are conditionally independent given $M_1,\ldots,M_L$.
\end{itemize}
\begin{figure}[!h]
\centering
\begin{forest}
  for tree={
    math content,
  },
  delay={
    where content={}{
      if level=0{}{
        content=\ldots,
        math content,
        no edge,
        fit=band,
      },
    }{
      rounded corners,
      outer color=blue!20,
      inner color=blue!15,
      minimum height=1cm,
      minimum width=1cm,
      draw,
      drop shadow,
    },
  },
  label levels,
  [, phantom
    [$\tau^2${, }$\sigma^2$, plain content, level label= Fixed
      [m_1, level label= $M_1{,}\ldots{,}M_L$
        [x_{11},level label= $\{X_{lj}{,}l{=}1{,}\ldots{,}L{,}j{=}1{,}\ldots{,}n_l\}$]
        []
        [x_{1n_1}],
      ]
      [m_2,
        [x_{21}]
        []
        [x_{2n_1}]
      ]
      []
      [m_L
        [x_{L1}]
        []
        [x_{Ln_L}]
      ]
    ]
  ]
\end{forest}
\end{figure}

\medskip
In the following plot, we have $L=5$, with $n_l = 1000$ for $l=1,\ldots,L$, with $\tau^2 = 2^2$ and $\sigma^2 = 0.1^2$.

<<H01,comment='+', fig.width=6, fig.height=4, fig.align='center'>>=
set.seed(23984)
L<-5
nvec<-rep(1000,L)
tau<-2; sig<-0.1
M<-rnorm(L,0,tau)
mvec<-rep(M,nvec)
X<-rnorm(sum(nvec),mvec,sig)
par(mar=c(3,3,2,1))
hist(X,breaks=seq(-4,4,by=0.1),main='');box()
points(M,rep(0,L),pch=19,col='red',cex=0.5)
@
In the histogram,
\begin{itemize}
  \item the red dots indicate the position of the sampled $m_1,\ldots,m_5$;
  \item the histograms represent the sampled $X_{lj}$ for $l=1,\ldots,5$ and $j = 1,\ldots,1000$.
\end{itemize}

We can implement the same model with the variables having bivariate Normal distributions: for example
\[
\bM_l = \begin{bmatrix} M_{l1}  \\ M_{l2} \end{bmatrix} \sim Normal_2(\zerovec,\bV)
\]
for $l=1,\ldots,L$, independently, with
\[
\bV = \begin{bmatrix} 2 & 3 \\ 3 & 4 \end{bmatrix}
\]
and, for $j=1,\ldots,n_l$
\[
\bX_{lj}|\bM_l = \bm_l \sim Normal_2(\bm_l,\Sigma)
\]
conditionally independent, with the factorization of $\Sigma$ as
\[
\Sigma = \begin{bmatrix*}[r] 0.50 & 0.00 \\ 0.00 & 0.60 \end{bmatrix*}
\begin{bmatrix*}[r] 1.0 & -0.8 \\ -0.8 & 1.0 \end{bmatrix*} \begin{bmatrix*}[r] 0.50 & 0.00 \\ 0.00 & 0.60 \end{bmatrix*} = \begin{bmatrix*}[r] 0.25 & -0.24 \\ -0.24 & 0.36 \end{bmatrix*}.
\]
In the following plot, we have $L=10$, with $n_l = 100$ for $l=1,\ldots,L$
<<H02,comment='+', fig.width=4.5, fig.height=4.5, fig.align='center'>>=
set.seed(23984)
library(mvnfast)
L<-10
nvec<-rep(100,L)
V<-matrix(c(4,3,3,4),2,2)
Sigma<-diag(c(0.5,0.60)) %*% matrix(c(1,-0.8,-0.8,1),2,2) %*% diag(c(0.5,0.60))
M<-rmvn(L,rep(0,2),V)
X<-numeric(length=2)
for(l in 1:L){
    Z<-rmvn(nvec[l],M[l,],Sigma)
    X<-rbind(X,Z)
}
par(mar=c(3,4,1,1))
plot(X,pch=19,cex=0.4,xlim=range(-6,6),ylim=range(-6,6),
        xlab=expression(X[lj1]),ylab=expression(X[lj2]))
points(M,pch=19,col='red',cex=0.5)
@

For each $l$, the $X_{lj}$ values for $j=1,\ldots,n_l$ are conditionally independent given $M_l = m_l$, but are not (unconditionally) independent.  For the univariate case, the marginal distribution of $\bX_{l} = (X_{l1},\ldots,X_{l n_l})^\top$ is computed as
\begin{align*}
f_{X_{l1},\ldots,X_{l n_l}}(x_{l1},\ldots,x_{l n_l};\tau^2,\sigma^2) & = \int_{-\infty}^\infty \prod_{j=1}^{n_l} f_{X_{lj}|M_l}(x_{lj}|m_l;\sigma^2) f_{M_l}(m_l;\tau^2) \ d m_l \\[6pt]
& = \int_{-\infty}^\infty \prod_{j=1}^{n_l} \left\{ \left(\dfrac{1}{2 \pi \sigma^2 }\right)^{1/2} \exp\left\{-\dfrac{1}{2 \sigma^2} (x_{lj}-m_l)^2 \right\} \right\} \left(\dfrac{1}{2 \pi \tau^2}\right)^{1/2} \exp\left\{-\dfrac{1}{2 \tau^2} m_l^2 \right\} dm_l \\[6pt]
& = \left(\dfrac{1}{2 \pi}\right)^{(n_l+1)/2}\left(\frac{1}{\sigma^2}\right)^{n_l/2} \left(\frac{1}{\tau^2}\right)^{1/2} \int_{-\infty}^\infty \exp\left\{-\dfrac{1}{2} \left[ \frac{1}{\sigma^2} \sum_{j=1}^n (x_{lj} - m_l)^2 + \frac{1}{\tau^2} m_l^2 \right] \right\}d m_l.
\end{align*}
Completing the square gives
\[
\frac{1}{\sigma^2} \sum_{j=1}^n (x_{lj} - m_l)^2 + \frac{1}{\tau^2} m_l^2 = \frac{1}{\sigma^2} \sum_{j=1}^{n_l} (x_{lj} - \xbar_l)^2 + \left(\frac{n_l}{\sigma^2}+\frac{1}{\tau^2} \right) \left(m_l - \frac{n_l \xbar/\sigma^2}{n/\sigma^2+1/\tau^2} \right)^2 + \frac{n/\sigma^2}{n/\sigma^2+1/\tau^2} \xbar_l^2
\]
and thus integrating out $m_l$ yields
\begin{align*}
f_{X_{l1},\ldots,X_{l n_l}}(x_{l1},\ldots,x_{l n_l};\tau^2,\sigma^2) & = \left(\dfrac{1}{2 \pi}\right)^{n_l/2}\left(\frac{1}{\sigma^2}\right)^{n_l/2} \left(\frac{1}{\tau^2}\right)^{1/2} \left(\frac{n_l}{\sigma^2}+\frac{1}{\tau^2} \right)^{-1/2} \\[6pt]
& \qquad \qquad \qquad \exp\left\{-\dfrac{1}{2} \left[ \frac{1}{\sigma^2} \sum_{j=1}^{n_l} (x_{lj} - \xbar_l)^2  + \frac{n/\sigma^2}{n/\sigma^2+1/\tau^2} \xbar_l^2 \right] \right\}
\end{align*}
This joint pdf does not factorize into a product of functions of the individual $x_{lj}$ values, and hence the random variables are not indepenent.  We can compute the distribution more concisely using mgfs and iterated expectation.  We have for the multivariate mgf
\begin{align*}
M_{\bX_l}(\bt) = \E_\bX \left[\exp\{ \bt^\top \bX \}\right] & = \E_{M_l} \left[ \E_{\bX|M_l} \left[\exp\{ \bt^\top \bX \} \bigg| M_l\right] \right] & \text{by iterated expectation}\\[6pt]
& = \E_{M_l} \left[ \E_{\bX|M_l} \left[\exp\left\{ \sum_{j=1}^{n_l} t_j X_{lj} \right\} \bigg| M_l\right] \right] & \text{expanding the inner product} \\[6pt]
& = \E_{M_l} \left[ \prod_{j=1}^{n_l} \exp\left\{M_l t_j + \frac{t_j^2 \sigma^2}{2} \right\} \right] & \text{using the Normal mgf for $X_{lj}$} \\[6pt]
& = \exp \left\{\frac{(\bt^\top \bt) \sigma^2}{2} \right\} \E_{M_l} \left[ \exp\left\{M_l (\onevec^\top \bt) \right\} \right] \\[6pt]
& = \exp \left\{\frac{(\bt^\top \bt) \sigma^2}{2} \right\} \exp \left\{ \frac{(\onevec^\top \bt)^2 \tau^2}{2} \right\} & \text{using the Normal mgf for $M_{l}$}\\[6pt]
& = \exp \left\{\frac{\bt^\top \bV \bt}{2} \right\} & (\onevec^\top \bt)^2 = \bt^\top (\onevec \onevec^\top) \bt
\end{align*}
where, by inspection, we have that
\[
\bV = \sigma^2 \Ident_{n_l} + \tau^2 \onevec \onevec^\top
\]
where, for the $(j,k)$th element, we have
\[
[\bV]_{jk} = \left\{
\begin{array}{cc}
  \sigma^2 + \tau^2 & j = k \\
  \tau^2 &  j \neq k
\end{array}
\right.
\]
Thus we can conclude that $\bX_l \sim Normal_{n_l}(\zerovec,\bV)$.

\bigskip
We can verify this by direct calculation: we have that
\[
\E_{X_{lj}}[X_{lj}] = \E_{M_l}[\E_{X_{lj}|M_l}[X_{lj}|M_l]] = \E_{M_l}[M_l] = 0.
\]
and
\[
\E_{X_{lj}}[X_{lj}^2] = \E_{M_l}[\E_{X_{lj}|M_l}[X_{lj}^2|M_l]] = \E_{M_l}[M_l^2 + \tau^2] = \sigma^2 + \tau^2.
\]
so $\Var_{X_{lj}}[X_{lj}] = \sigma^2 + \tau^2$.  Finally, for $j \neq k$,
\[
\E_{X_{lj},X_{lk}}[X_{lj} X_{lk} ] = \E_{M_l}[\E_{X_{lj},X_{lk}|M_l}[X_{lj} X_{lk}|M_l ]] = \E_{M_l}[M_l^2] = \tau^2
\]
as $X_{lj}$ and $X_{lk}$ are conditionally independent given $M_l$, each with mean $M_l$.  We therefore conclude that
\[
\Corr_{X_{lj},X_{lk}}[X_{lj},X_{lk} ] = \frac{\Cov_{X_{lj},X_{lk}}[X_{lj},X_{lk} ]}{\Var_{X_{lj}}[X_{lj}]} = \frac{\tau^2}{\sigma^2 + \tau^2}.
\]

\pagebreak
Note that

\begin{itemize}

\item this correlation is always positive;

\item if $\tau^2 \lra 0$, the correlation converges to zero, and we have reverted to the iid $Normal(0,\sigma^2)$ case;


\item as $\sigma^2 \lra 0$, the correlation converges to one.


\item In this model, $l_1 \neq l_2$, the $X_{l_1 j_1}$ and $X_{l_2 j_2}$ are \textbf{unconditionally independent} for all $j_1$ and $j_2$.

\end{itemize}

In the following simulation, we have $L=2$ and $n_1 = n_2 =3$, with $\tau=2$ and $\sigma=1$, yielding a within-cluster correlation of
\[
\frac{\tau^2}{\sigma^2 + \tau^2} = 0.8.
\]

<<H03,comment='+', fig.width=5, fig.height=5, fig.align='center'>>=
tau<-2;sig<-1
X<-t(replicate(1000,c(rnorm(3,rnorm(1,0,tau),sig),c(rnorm(3,rnorm(1,0,tau),sig)))))
par(mar=c(3,3,0,1))
pairs(X,cex=0.5,pch=19,labels=c(expression(X[11]),expression(X[12]),expression(X[13]),
                        expression(X[21]),expression(X[22]),expression(X[23])))
round(cor(X),4)
@

\end{document}