\documentclass[notitlepage]{article} \usepackage{../../Math556} \usepackage{listings} \usepackage{numprint} \usepackage{enumerate} \usepackage{multirow} \usepackage{bbm} \usepackage{bbold} \usepackage{mathtools} \usepackage{amsfonts,amsmath,amssymb} \usepackage{tikz} \usepackage{forest} \usetikzlibrary{shadows,fit,backgrounds} \forestset{ declare toks={level label}{}, declare toks register={level labels}, level labels={}, declare count register={leveller}, leveller'=0, level split/.style={ temptoksa={#1}, split register={temptoksa}{:}{tempcounta,level label split}, }, level label split/.style={ temptoksb={#1}, temptoksc={}, split register={temptoksb}{,}{temptoksc, level label splitter}, tikz+/.wrap 2 pgfmath args={ \node (label leveller ##1) [anchor=east, align=right, font=\sffamily] at (level ##1.west -| westpoint) {##2}; }{tempcounta}{temptoksc}, before computing xy/.wrap pgfmath arg={ tikz+={ \node [anchor=north east, align=right, font=\rmfamily] at (label leveller ##1.north -| west of westpoint) {LEVEL ##1}; }, }{tempcounta}, }, level label splitter/.style={ temptoksc+={\\#1}, }, label levels/.style={ tikz+={ \coordinate (westpoint) at ([xshift=-15pt]current bounding box.west); }, before packing={ tikz+={ \coordinate (west of westpoint) at ([xshift=-15pt]current bounding box.west); }, }, before drawing tree={ tikz+={ \scoped[on background layer]{\node [left color=blue!50!cyan!25!white, right color=blue!50!cyan!25!white, middle color=blue!50!cyan, inner sep=10pt, rounded corners, draw=blue!50!cyan, draw opacity=.5, fill opacity=.15, fit=(current bounding box)] {};} }, }, delay={ for tree breadth-first={ if level label={}{}{ if={(level())>(leveller)}{ leveller/.option=level, alias/.wrap pgfmath arg={level ##1}{level()}, if level labels={}{}{ level labels+={;}, }, level labels+/.option=level, level labels+={:}, }{}, level labels+/.option=level label, level labels+={,}, }, }, }, before typesetting nodes={ if level labels={}{}{ split register={level labels}{;}{level split}, }, }, } } \setlength{\parindent}{0pt} \def\E{\Expect} \lstloadlanguages{R} \definecolor{keywordcolor}{rgb}{0,0.6,0.6} \definecolor{delimcolor}{rgb}{0.461,0.039,0.102} \definecolor{Rcommentcolor}{rgb}{0.101,0.043,0.432} \lstdefinestyle{Rsettings}{ basicstyle=\ttfamily, breaklines=true, showstringspaces=false, keywords={if, else, function, theFunction, tmp}, % Write as many keywords otherkeywords={}, commentstyle=\itshape\color{Rcommentcolor}, keywordstyle=\color{keywordcolor}, moredelim=[s][\color{delimcolor}]{"}{"}, } \lstset{basicstyle=\ttfamily, numbers=none, literate={~} {$\sim$}{2}} \begin{document} <>= library(knitr) # global chunk options opts_chunk$set(cache=TRUE, autodep=TRUE) options(scipen=999) options(repos=c(CRAN="https://cloud.r-project.org/")) inline_hook <- function (x) { if (is.numeric(x)) { # ifelse does a vectorized comparison # If integer, print without decimal; otherwise print two places res <- ifelse(x == round(x), sprintf("%.6f", x), sprintf("%.6f", x) ) paste(res, collapse = ", ") } } knit_hooks$set(inline = inline_hook) @ \begin{center} \textsclarge{MATH 556: Mathematical Statistics I} \vspace{0.1 in} \textsc{Hierarchical Models: Variance Components} \end{center} Consider the three-level hierarchical model: \begin{itemize} \item[] \text{LEVEL 3}: $\tau^2,\sigma^2 > 0$, fixed parameters; \medskip \item[] \text{LEVEL 2}: $M_1,\ldots,M_L \sim Normal(0,\tau^2)$ independent; \medskip \item[] \textrm{LEVEL 1}: For $l=1,\ldots,L$ \[ X_{l1},\ldots,X_{l n_l}| M_l = m_l \sim Normal(m_l,\sigma^2) \] where all the $X_{lj}$ are conditionally independent given $M_1,\ldots,M_L$. \end{itemize} \begin{figure}[!h] \centering \begin{forest} for tree={ math content, }, delay={ where content={}{ if level=0{}{ content=\ldots, math content, no edge, fit=band, }, }{ rounded corners, outer color=blue!20, inner color=blue!15, minimum height=1cm, minimum width=1cm, draw, drop shadow, }, }, label levels, [, phantom [$\tau^2${, }$\sigma^2$, plain content, level label= Fixed [m_1, level label= $M_1{,}\ldots{,}M_L$ [x_{11},level label= $\{X_{lj}{,}l{=}1{,}\ldots{,}L{,}j{=}1{,}\ldots{,}n_l\}$] [] [x_{1n_1}], ] [m_2, [x_{21}] [] [x_{2n_1}] ] [] [m_L [x_{L1}] [] [x_{Ln_L}] ] ] ] \end{forest} \end{figure} \medskip In the following plot, we have $L=5$, with $n_l = 1000$ for $l=1,\ldots,L$, with $\tau^2 = 2^2$ and $\sigma^2 = 0.1^2$. <>= set.seed(23984) L<-5 nvec<-rep(1000,L) tau<-2; sig<-0.1 M<-rnorm(L,0,tau) mvec<-rep(M,nvec) X<-rnorm(sum(nvec),mvec,sig) par(mar=c(3,3,2,1)) hist(X,breaks=seq(-4,4,by=0.1),main='');box() points(M,rep(0,L),pch=19,col='red',cex=0.5) @ In the histogram, \begin{itemize} \item the red dots indicate the position of the sampled $m_1,\ldots,m_5$; \item the histograms represent the sampled $X_{lj}$ for $l=1,\ldots,5$ and $j = 1,\ldots,1000$. \end{itemize} We can implement the same model with the variables having bivariate Normal distributions: for example \[ \bM_l = \begin{bmatrix} M_{l1} \\ M_{l2} \end{bmatrix} \sim Normal_2(\zerovec,\bV) \] for $l=1,\ldots,L$, independently, with \[ \bV = \begin{bmatrix} 2 & 3 \\ 3 & 4 \end{bmatrix} \] and, for $j=1,\ldots,n_l$ \[ \bX_{lj}|\bM_l = \bm_l \sim Normal_2(\bm_l,\Sigma) \] conditionally independent, with the factorization of $\Sigma$ as \[ \Sigma = \begin{bmatrix*}[r] 0.50 & 0.00 \\ 0.00 & 0.60 \end{bmatrix*} \begin{bmatrix*}[r] 1.0 & -0.8 \\ -0.8 & 1.0 \end{bmatrix*} \begin{bmatrix*}[r] 0.50 & 0.00 \\ 0.00 & 0.60 \end{bmatrix*} = \begin{bmatrix*}[r] 0.25 & -0.24 \\ -0.24 & 0.36 \end{bmatrix*}. \] In the following plot, we have $L=10$, with $n_l = 100$ for $l=1,\ldots,L$ <>= set.seed(23984) library(mvnfast) L<-10 nvec<-rep(100,L) V<-matrix(c(4,3,3,4),2,2) Sigma<-diag(c(0.5,0.60)) %*% matrix(c(1,-0.8,-0.8,1),2,2) %*% diag(c(0.5,0.60)) M<-rmvn(L,rep(0,2),V) X<-numeric(length=2) for(l in 1:L){ Z<-rmvn(nvec[l],M[l,],Sigma) X<-rbind(X,Z) } par(mar=c(3,4,1,1)) plot(X,pch=19,cex=0.4,xlim=range(-6,6),ylim=range(-6,6), xlab=expression(X[lj1]),ylab=expression(X[lj2])) points(M,pch=19,col='red',cex=0.5) @ For each $l$, the $X_{lj}$ values for $j=1,\ldots,n_l$ are conditionally independent given $M_l = m_l$, but are not (unconditionally) independent. For the univariate case, the marginal distribution of $\bX_{l} = (X_{l1},\ldots,X_{l n_l})^\top$ is computed as \begin{align*} f_{X_{l1},\ldots,X_{l n_l}}(x_{l1},\ldots,x_{l n_l};\tau^2,\sigma^2) & = \int_{-\infty}^\infty \prod_{j=1}^{n_l} f_{X_{lj}|M_l}(x_{lj}|m_l;\sigma^2) f_{M_l}(m_l;\tau^2) \ d m_l \\[6pt] & = \int_{-\infty}^\infty \prod_{j=1}^{n_l} \left\{ \left(\dfrac{1}{2 \pi \sigma^2 }\right)^{1/2} \exp\left\{-\dfrac{1}{2 \sigma^2} (x_{lj}-m_l)^2 \right\} \right\} \left(\dfrac{1}{2 \pi \tau^2}\right)^{1/2} \exp\left\{-\dfrac{1}{2 \tau^2} m_l^2 \right\} dm_l \\[6pt] & = \left(\dfrac{1}{2 \pi}\right)^{(n_l+1)/2}\left(\frac{1}{\sigma^2}\right)^{n_l/2} \left(\frac{1}{\tau^2}\right)^{1/2} \int_{-\infty}^\infty \exp\left\{-\dfrac{1}{2} \left[ \frac{1}{\sigma^2} \sum_{j=1}^n (x_{lj} - m_l)^2 + \frac{1}{\tau^2} m_l^2 \right] \right\}d m_l. \end{align*} Completing the square gives \[ \frac{1}{\sigma^2} \sum_{j=1}^n (x_{lj} - m_l)^2 + \frac{1}{\tau^2} m_l^2 = \frac{1}{\sigma^2} \sum_{j=1}^{n_l} (x_{lj} - \xbar_l)^2 + \left(\frac{n_l}{\sigma^2}+\frac{1}{\tau^2} \right) \left(m_l - \frac{n_l \xbar/\sigma^2}{n/\sigma^2+1/\tau^2} \right)^2 + \frac{n/\sigma^2}{n/\sigma^2+1/\tau^2} \xbar_l^2 \] and thus integrating out $m_l$ yields \begin{align*} f_{X_{l1},\ldots,X_{l n_l}}(x_{l1},\ldots,x_{l n_l};\tau^2,\sigma^2) & = \left(\dfrac{1}{2 \pi}\right)^{n_l/2}\left(\frac{1}{\sigma^2}\right)^{n_l/2} \left(\frac{1}{\tau^2}\right)^{1/2} \left(\frac{n_l}{\sigma^2}+\frac{1}{\tau^2} \right)^{-1/2} \\[6pt] & \qquad \qquad \qquad \exp\left\{-\dfrac{1}{2} \left[ \frac{1}{\sigma^2} \sum_{j=1}^{n_l} (x_{lj} - \xbar_l)^2 + \frac{n/\sigma^2}{n/\sigma^2+1/\tau^2} \xbar_l^2 \right] \right\} \end{align*} This joint pdf does not factorize into a product of functions of the individual $x_{lj}$ values, and hence the random variables are not indepenent. We can compute the distribution more concisely using mgfs and iterated expectation. We have for the multivariate mgf \begin{align*} M_{\bX_l}(\bt) = \E_\bX \left[\exp\{ \bt^\top \bX \}\right] & = \E_{M_l} \left[ \E_{\bX|M_l} \left[\exp\{ \bt^\top \bX \} \bigg| M_l\right] \right] & \text{by iterated expectation}\\[6pt] & = \E_{M_l} \left[ \E_{\bX|M_l} \left[\exp\left\{ \sum_{j=1}^{n_l} t_j X_{lj} \right\} \bigg| M_l\right] \right] & \text{expanding the inner product} \\[6pt] & = \E_{M_l} \left[ \prod_{j=1}^{n_l} \exp\left\{M_l t_j + \frac{t_j^2 \sigma^2}{2} \right\} \right] & \text{using the Normal mgf for $X_{lj}$} \\[6pt] & = \exp \left\{\frac{(\bt^\top \bt) \sigma^2}{2} \right\} \E_{M_l} \left[ \exp\left\{M_l (\onevec^\top \bt) \right\} \right] \\[6pt] & = \exp \left\{\frac{(\bt^\top \bt) \sigma^2}{2} \right\} \exp \left\{ \frac{(\onevec^\top \bt)^2 \tau^2}{2} \right\} & \text{using the Normal mgf for $M_{l}$}\\[6pt] & = \exp \left\{\frac{\bt^\top \bV \bt}{2} \right\} & (\onevec^\top \bt)^2 = \bt^\top (\onevec \onevec^\top) \bt \end{align*} where, by inspection, we have that \[ \bV = \sigma^2 \Ident_{n_l} + \tau^2 \onevec \onevec^\top \] where, for the $(j,k)$th element, we have \[ [\bV]_{jk} = \left\{ \begin{array}{cc} \sigma^2 + \tau^2 & j = k \\ \tau^2 & j \neq k \end{array} \right. \] Thus we can conclude that $\bX_l \sim Normal_{n_l}(\zerovec,\bV)$. \bigskip We can verify this by direct calculation: we have that \[ \E_{X_{lj}}[X_{lj}] = \E_{M_l}[\E_{X_{lj}|M_l}[X_{lj}|M_l]] = \E_{M_l}[M_l] = 0. \] and \[ \E_{X_{lj}}[X_{lj}^2] = \E_{M_l}[\E_{X_{lj}|M_l}[X_{lj}^2|M_l]] = \E_{M_l}[M_l^2 + \tau^2] = \sigma^2 + \tau^2. \] so $\Var_{X_{lj}}[X_{lj}] = \sigma^2 + \tau^2$. Finally, for $j \neq k$, \[ \E_{X_{lj},X_{lk}}[X_{lj} X_{lk} ] = \E_{M_l}[\E_{X_{lj},X_{lk}|M_l}[X_{lj} X_{lk}|M_l ]] = \E_{M_l}[M_l^2] = \tau^2 \] as $X_{lj}$ and $X_{lk}$ are conditionally independent given $M_l$, each with mean $M_l$. We therefore conclude that \[ \Corr_{X_{lj},X_{lk}}[X_{lj},X_{lk} ] = \frac{\Cov_{X_{lj},X_{lk}}[X_{lj},X_{lk} ]}{\Var_{X_{lj}}[X_{lj}]} = \frac{\tau^2}{\sigma^2 + \tau^2}. \] \pagebreak Note that \begin{itemize} \item this correlation is always positive; \item if $\tau^2 \lra 0$, the correlation converges to zero, and we have reverted to the iid $Normal(0,\sigma^2)$ case; \item as $\sigma^2 \lra 0$, the correlation converges to one. \item In this model, $l_1 \neq l_2$, the $X_{l_1 j_1}$ and $X_{l_2 j_2}$ are \textbf{unconditionally independent} for all $j_1$ and $j_2$. \end{itemize} In the following simulation, we have $L=2$ and $n_1 = n_2 =3$, with $\tau=2$ and $\sigma=1$, yielding a within-cluster correlation of \[ \frac{\tau^2}{\sigma^2 + \tau^2} = 0.8. \] <>= tau<-2;sig<-1 X<-t(replicate(1000,c(rnorm(3,rnorm(1,0,tau),sig),c(rnorm(3,rnorm(1,0,tau),sig))))) par(mar=c(3,3,0,1)) pairs(X,cex=0.5,pch=19,labels=c(expression(X[11]),expression(X[12]),expression(X[13]), expression(X[21]),expression(X[22]),expression(X[23]))) round(cor(X),4) @ \end{document}