\documentclass[notitlepage]{article} \usepackage{../../Math556} \usepackage{listings} \usepackage{numprint} \usepackage{enumerate} \usepackage{multirow} \usepackage{bbm} \usepackage{bbold} \usepackage{amsfonts,amsmath,amssymb} \setlength{\parindent}{0pt} \def\E{\Expect} \lstloadlanguages{R} \definecolor{keywordcolor}{rgb}{0,0.6,0.6} \definecolor{delimcolor}{rgb}{0.461,0.039,0.102} \definecolor{Rcommentcolor}{rgb}{0.101,0.043,0.432} \lstdefinestyle{Rsettings}{ basicstyle=\ttfamily, breaklines=true, showstringspaces=false, keywords={if, else, function, theFunction, tmp}, % Write as many keywords otherkeywords={}, commentstyle=\itshape\color{Rcommentcolor}, keywordstyle=\color{keywordcolor}, moredelim=[s][\color{delimcolor}]{"}{"}, } \lstset{basicstyle=\ttfamily, numbers=none, literate={~} {$\sim$}{2}} \def\htitle{\textsclarge{Basic Exchangeability Constructions}} \begin{document} <>= library(knitr) # global chunk options opts_chunk$set(cache=TRUE, autodep=TRUE) options(scipen=999) options(repos=c(CRAN="https://cloud.r-project.org/")) inline_hook <- function (x) { if (is.numeric(x)) { # ifelse does a vectorized comparison # If integer, print without decimal; otherwise print two places res <- ifelse(x == round(x), sprintf("%.6f", x), sprintf("%.6f", x) ) paste(res, collapse = ", ") } } knit_hooks$set(inline = inline_hook) @ \begin{center}\coursetitle\end{center} \begin{center}\htitle\end{center} An infinite sequence of random variable $X_1,X_2,\ldots,X_n,\ldots$ is \textit{exchangeable} (or \textit{infinitely exchangeable}) if, for any $n \geq 1$ and sets $A_1,A_2,\ldots,A_n \subseteq \R$ we have that \[ P_{X_1,\ldots,X_n} \left[ \bigcap_{j=1}^n (X_j \in A_j)\right] = P_{X_{\tau(1)},\ldots,X_{\tau(n)}} \left[ \bigcap_{j=1}^n (X_{\tau(j)} \in A_j)\right] \] for all permutations $(\tau(1),\ldots,\tau(n))$ of the labels $(1,\ldots,n)$. In terms of cdfs, we can express this as that for all $(x_1,\ldots,x_n) \in \mathbb{R}^n$ \[ F_{X_1,\ldots,X_n}(x_1,\ldots,x_n) = F_{X_{\tau(1)},\ldots,X_{\tau(n)}} (x_1,\ldots,x_n). \] We have the following characterization: the infinite sequence $X_1,X_2,\ldots,X_n,\ldots$ is exchangeable if and only if the representation \[ P_{X_1,\ldots,X_n} \left[ \bigcap_{j=1}^n (X_j \in A_j)\right] = \int \left\{ \prod_{j=1}^n P_{X_j|T}[X_j \in A_j | T = t] \right\} dF_T(t) \] holds for some other random variable $T$ with distribution $F_T$. That is, the sequence is exchangeable if and only if elements in the sequence are conditionally independent given $T$, for some $T$ with distribution $F_T$. In fact, $T$ is a random variable formed as some function of $(X_1,\ldots,X_n)$ in the limiting case as $n \lra \infty$. \medskip The representation also indicates that we can construct exchangeable random variables by following the construction \begin{align*} T & \sim f_T(t) \\[6pt] X_1,\ldots,X_n & \sim f_{X|T}(x|t) \qquad \textrm{independent} \end{align*} \medskip \textbf{EXAMPLE:} Suppose $T \sim Uniform(0,1)$, and $X_1,\ldots,X_n|T=t \sim Bernoulli(t)$ independently. Then \[ f_{X_1,\ldots,X_n}(x_1,\ldots,x_n) = \int_0^1 \prod_{j=1}^n f_{X_j|T}(x_j|t) f_T(t) \ dt = \int_0^1 t^s (1-t)^{n-s} \ dt = \frac{\Gamma(s+1) \Gamma(n-s+1)}{\Gamma(n+2)} \] where $s = \sum\limits_{j=1}^n x_j$, for $s = 0,1,\ldots,n$, where the support of the joint pmf is the set $\{0,1\}^n$ of binary vectors of length $n$. The integral is analytically tractable as the integrand is proportional to a $Beta(s+1,n-s+1)$ pdf. Note that in this construction, the quantity $s$ is associated with a corresponding random variable \[ S = \sum_{j = 1}^n X_i \] which we can consider a \textit{summary statistic}, and notice that the event $S = s$ corresponds to \[ \binom{n}{s} \] individual sequences of $x$ values which all have the same joint probability: this demonstrates exchangeability. Thus \[ f_S(s) = \binom{n}{s} \frac{\Gamma(s+1) \Gamma(n-s+1)}{\Gamma(n+2)} = \frac{n!}{s! (n-s)!} \frac{s! (n-s)!}{(n+1)!} = \frac{1}{n+1} \qquad s = 0,1,\ldots,n. \] and zero otherwise. <>= n<-10 s<-0:n fs<-choose(n,s)*gamma(s+1)*gamma(n-s+1)/gamma(n+2) fs sum(fs) sim.exch01<-function(nv){ #Sample the exchangeable binary variables. Tv<-runif(1) Xv<-rbinom(nv,1,Tv) } svals<-replicate(10000,sum(sim.exch01(n))) #10000 replicate draws of S table(svals)/10000 @ \medskip \textbf{EXAMPLE:} Suppose $T \sim Normal(0,1)$, and $X_1,\ldots,X_n|T=t \sim Normal(t,1)$ independently. Then \begin{align*} f_{X_1,\ldots,X_n}(x_1,\ldots,x_n) & = \int_{-\infty}^\infty \prod_{j=1}^n f_{X_j|T}(x_j|t) f_T(t) \ dt \\[6pt] & = \int_{-\infty}^\infty \prod_{j=1}^n \left\{ \left(\dfrac{1}{2 \pi}\right)^{1/2} \exp\left\{-\dfrac{1}{2} (x_j-t)^2 \right\} \right\} \left(\dfrac{1}{2 \pi}\right)^{1/2} \exp\left\{-\dfrac{1}{2} t^2 \right\} dt \\[6pt] & = \left(\dfrac{1}{2 \pi}\right)^{(n+1)/2} \int_{-\infty}^\infty \exp\left\{-\dfrac{1}{2} \left[ \sum_{j=1}^n (x_j - t)^2 + t^2 \right] \right\}dt. \end{align*} Now, using the completing the square formula \[ A(t-a)^2 + B(t-b)^2 = (A+B) \left( t - \frac{Aa+Bb}{A+B} \right)^2 + \frac{AB}{A+B} (a-b)^2 \] we have \[ \sum_{j=1}^n (x_j - t)^2 + t^2 = \sum_{j=1}^n (x_j - \xbar)^2 + (n+1) \left(t - \frac{n \xbar}{n+1} \right)^2 + \frac{n}{n+1} \xbar^2 \] so therefore, we have \begin{align*} \int_{-\infty}^\infty \exp\left\{-\dfrac{1}{2} \left[ \sum_{j=1}^n (x_j - t)^2 + t^2 \right] \right\}dt & = \exp\left\{ -\dfrac{1}{2} \left[\sum\limits_{j=1}^n (x_j - \xbar)^2 + \frac{n}{n+1} \xbar^2 \right] \right\} \int_{-\infty}^\infty \exp\left\{-\dfrac{(n+1)}{2} \left(t - \frac{n \xbar}{n+1} \right)^2 \right\} \ dt \\[6pt] & = \exp\left\{ -\dfrac{1}{2} \left[\sum\limits_{j=1}^n (x_j - \xbar)^2 + \frac{n}{n+1} \xbar^2 \right] \right\} \sqrt{\dfrac{2 \pi}{n+1}} \end{align*} as the integrand is proportional to a Normal pdf. Thus for $(x_1,\ldots,x_n) \in \R^n$, \[ f_{X_1,\ldots,X_n}(x_1,\ldots,x_n) = \left(\dfrac{1}{2 \pi}\right)^{n/2} \sqrt{\dfrac{1}{n+1}} \exp\left\{ -\dfrac{1}{2} \left[\sum\limits_{j=1}^n (x_j - \xbar)^2 + \frac{n}{n+1} \xbar^2 \right] \right\} \] which also relies only upon the summary statistics \[ S_1 = \xbar \qquad S_2 = \sum\limits_{j=1}^n (x_j - \xbar)^2 \] and so we observe exchangeability. <>= n<-10 sim.exch02<-function(nv){ #Sample the exchangeable variables. Tv<-rnorm(1) Xv<-rnorm(nv,Tv,1) } Xmat<-t(replicate(2000,sim.exch02(n))) #10000 replicate draws of S dim(Xmat) par(pty='s') pairs(Xmat[,1:5],pch=19,cex=0.5, labels=c(expression(X[1]),expression(X[2]),expression(X[3]), expression(X[4]),expression(X[5]))) pairs(Xmat[,6:10],pch=19,cex=0.5, labels=c(expression(X[6]),expression(X[7]),expression(X[8]), expression(X[9]),expression(X[10]))) @ We have that for $j=1,\ldots,n$, \[ \E_{X_j}[X_j] = 0 \qquad \qquad \Var_{X_j}[X_j] = 2 \] <>= apply(Xmat,2,mean) apply(Xmat,2,var) @ Also, for the covariances, using iterated expectation we have \[ \Cov_{X_j,X_k}[X_j,X_k] \equiv \E_{X_j,X_k}[X_j X_k] = \E_T \left[ \E_{X_j,X_k|T}[X_j X_k|T] \right] = \E_T \left[ \E_{X_j|T}[X_j|T] \E_{X_k|T}[X_k|T] \right] \] as $X_j$ and $X_k$ have expectation zero, and are conditionally independent given $T$. Thus, as $\E_{X_j|T}[X_j|T] = T$ for each $j$, we have \[ \Cov_{X_j,X_k}[X_j,X_k] = \E_T [ T^2 ] = 1 \] and hence \[ \Corr_{X_j,X_k}[X_j,X_k] = \frac{\Cov_{X_j,X_k}[X_j,X_k]}{\sqrt{\Var_{X_j}[X_j]\Var_{X_k}[X_k]}} = \frac{1}{2}. \] <>= round(cor(Xmat),3) @ \end{document}