\documentclass{beamer} \usepackage{verbatim} \usecolortheme{seagull} \usepackage{pstricks,pst-node,pst-tree,amscd,amsfonts,verbatim,geometry,bbm} \usepackage{fancyvrb,fancybox} \usepackage{color} \usepackage{xcolor} \usepackage{colortbl} \usepackage{graphicx} %\RequirePackage{etex} %\usepackage{etex} %\usepackage{xypic} \usepackage[all,color]{xy} \usepackage{natbib} \usepackage{tkz-graph} \usepackage{tikz} \usetikzlibrary{arrows,automata} \usetikzlibrary{graphs,arrows.meta} \usetikzlibrary{positioning} \usepackage{caption} \captionsetup[figure]{labelfont={it},textfont={color=black,it},labelformat={default},labelsep=period,name={}} \usepackage[latin1]{inputenc} \usepackage{undertilde} \usepackage{bm} \usepackage{amssymb} \usepackage{amsmath} %\usepackage{lastpage} \usepackage{rotating} \usepackage{multirow} \usepackage{graphicx} \usepackage{bbm} \usepackage{bbold} \usepackage{upgreek} \usepackage{fancybox} \usepackage{fancyhdr} \usepackage{amsfonts} \usepackage{dsfont} %\usepackage{arydshln} \usepackage{listings} \usepackage{courier} \lstloadlanguages{R} \usepackage[scaled=.85]{DejaVuSerifCondensed} \usepackage[T1]{fontenc} \usefonttheme{serif} \DeclareFontFamily{OT1}{pzc}{} \DeclareFontShape{OT1}{pzc}{m}{it}{<-> s * [1.30] pzcmi7t}{} \DeclareMathAlphabet{\mathpzc}{OT1}{pzc}{m}{it} %\theoremstyle{myNote} %\newtheorem{myNote}{Note} \usepackage{mathtools} \usepackage{bookmark} \usepackage{amsthm} \usepackage{ulem} \usepackage{soul} \usepackage{hanging} \setbeamertemplate{footnote}{\hangpara{2em}{1}\makebox[2em][l]{\insertfootnotemark}\footnotesize\insertfootnotetext\par} \addtobeamertemplate{footnote}{}{\vspace{5ex}} \hypersetup{% unicode =false, % non-Latin characters in Acrobat’s bookmarks pdftoolbar=true, % show Acrobat’s toolbar? pdfmenubar=true, % show Acrobat’s menu? %bookmarks=false, bookmarksopen=false } \usepackage{calc} \newsavebox\CBox \newcommand\hcancel[2][0.25pt]{% \ifmmode\sbox\CBox{$#2$}\else\sbox\CBox{#2}\fi% \makebox[0pt][l]{\usebox\CBox}% \rule[0.5\ht\CBox-#1/2]{\wd\CBox}{#1}} \newcommand{\msout}[1]{\text{\sout{\ensuremath{#1}}}} \makeatletter \newcommand*{\shifttext}[2]{% \settowidth{\@tempdima}{#2}% \makebox[\@tempdima]{\hspace*{#1}#2}% } \makeatother \newcommand\oldmyzeta[1]{{\textcolor{red}{\protect\mathpzc{z}_{#1}} \shifttext{-10pt}{\raisebox{0.1ex}{\tiny{\sim}}}}} \newcommand\firstmyzeta[1]{\textcolor{black}{{\protect\mathpzc{z}_{#1}}}} \newcommand\myzeta[1]{{\textcolor{mcgillred}{{\protect\textsf{z}_{#1}}}}} \newcommand\tildemyzeta[1]{\textcolor{mcgillred}{{\protect\widetilde{\textsf{z}}_{#1}}}} \theoremstyle{plain} \newtheorem{thm}{Theorem} % reset theorem numbering for each chapter \theoremstyle{definition} \newtheorem{defn}[thm]{Definition} % definition numbers are dependent on theorem numbers \newtheorem{Note}{Note} % same for example numbers \setbeamertemplate{theorems}[numbered] \def\U{\mathcal{U}} \def\u{\upsilon} \def\Ind{\mathds{1}} \def\transpose{\top} \def\Xvec{{\skew0\bm{X}}} \def\Xveca{{\skew0\bm{X}}_1} \def\Xvecb{{\skew0\bm{X}}_2} \def\Yvec{{\skew0\bm{Y}}} \def\xvec{{\skew0\bm{x}}} \def\xveca{{\skew0\bm{x}}_1} \def\xvecb{{\skew0\bm{x}}_2} \def\mvec{{\skew0\bm{m}}} \def\muvec{{\skew0\bm{\mu}}} \def\cvec{{\skew0\mathbf{c}}} \def\Xbar{\overline{X}} \def\xbar{\overline{x}} \def\ybar{\overline{y}} \def\yhat{\widehat{y}} \def\betahat{\widehat{\beta}} \def\betavec{\utilde{\beta}} \def\betahatvec{\skew3\widehat{\utilde{\beta}}} \def\sigmahat{\widehat{\sigma}} \def\ehat{\widehat{e}} \def\zhat{\widehat{z}} \def\nhat{\widehat{n}} \def\xbarA{\overline{x}_A} \def\xbarB{\overline{x}_B} \def\calR{\mathcal{R}} \def\calS{\mathcal{S}} \def\calE{{\mathcal{S}_{e}}} \def\calB{\mathcal{B}} \def\calU{\mathcal{U}} \def\calI{\mathcal{I}} \def\B{\mathcal{B}} \def\Qbb{\mathbb{Q}} \def\Pbb{\mathbb{P}} \def\E{\mathbb{E}} \def\Ehat{\widehat{\mathbbm{E}}} \def\Var{\mathbb{V}ar} \def\Cov{\mathbb{C}ov} \def\Corr{\mathbb{C}orr} \def\bhatint{\widehat{\beta}_0} \def\bhatsl{\widehat{\beta}_1} \def\d{\:\textrm{d}} \def\ps{{\textsf{e}}} \def\bs{{\textsf{b}}} \def\bX{\bm{X}} \def\Ident{\mathbf{I}} \def\One{\mathbf{1}} \def\Zero{\mathbf{0}} \def\bY{\mathbf{Y}} \newcommand\xbarBi[1]{\overline{x^{\text{\tiny{(B)}}}_{#1}}} \newcommand\xbarib[1]{\overline{x^{\text{\tiny{(B)}}}_{#1}}} \def\Fstat{\frac{(SSE_R - SSE_C)/(k-g)}{SSE_C/(n-k-1)}} \def\approxsim{\sim \! \! \! : \;} \def\hsp{\vspace{0.1 in}} \def\fsp{\vspace{0.2 in}} %\def\dfrac#1#2{{\displaystyle {#1 #2}}}% \def\dint{\mathop{\displaystyle \int}}% \def\dsum{\mathop{\displaystyle \sum}}% \newcommand\betaZ{$\beta_{1}$} \newcommand\betaA[1]{$\beta_{#1}^{(A)}$} \newcommand\betaB[1]{$\beta_{#1}^{(B)}$} \newcommand\gammaAB[1]{$\gamma_{#1}^{(AB)}$} \def\SST{\mathit{SST}} \def\MST{\mathit{MST}} \def\SSE{\mathit{SSE}} \def\MSE{\mathit{MSE}} \def\SSB{\mathit{SSB}} \def\MSB{\mathit{MSB}} \def\SSI{\mathit{SSI}} \def\MSI{\mathit{MSI}} \def\SS{\mathit{SS}} \def\Ztilde{{\widetilde Z}} \def\ztilde{{\widetilde z}} \def\bmu{\boldsymbol\mu} \newcounter{Lecture} \def\calA{\mathcal{A}} \renewcommand{\thelecture}{\arabic{lecture}} \newcommand\xbari[1]{\overline{x}_{#1}} \newcommand\subsectnum[1]{\textbf{\arabic{section}.\arabic{subsection}} \textbf{\uppercase{#1}}} \newcommand\textsclarge[1]{\fontshape{sc}\fontsize{14}{15} \selectfont{#1}} \renewcommand\emph[1]{\textcolor{mcgillred}{\textit{#1}}} \def\Exp{\mathcal{E}} \def\Ex{\mathcal{E}} \def\Exzero{\mathcal{E}_0} \def\Obs{\mathcal{O}} \def\Ob{\mathcal{O}} \usepackage{tikz} \newcommand*\circled[1]{\tikz[baseline=(char.base)]{ \node[shape=circle,draw,inner sep=1pt] (char) {#1};}} \def\WideSpacing{\itemsep=2.0ex\topsep=0.5ex\partopsep=0.0ex\parskip=0.0ex\parsep=0.0ex} \makeatletter \let\orig@Enumerate =\enumerate \newenvironment{enumerateWide}{\orig@Enumerate\WideSpacing}{\endlist} \makeatother \definecolor{midgray}{rgb}{0.75,0.75,0.75} \definecolor{lightgray}{rgb}{0.85,0.85,0.85} \definecolor{darkgray}{rgb}{0.654,0.724,0.795} \definecolor{lightdarkgray}{rgb}{0.727,0.805,0.883} \definecolor{purpleblue}{rgb}{0.50,0.50,1.00} \definecolor{lightblue}{rgb}{0.90,0.90,1.00} \definecolor{kugreen}{RGB}{50,93,61} \definecolor{kugreenlys}{RGB}{132,158,139} \definecolor{kugreenlyslys}{RGB}{173,190,177} \definecolor{kugreenlyslyslys}{RGB}{214,223,216} \definecolor{mcgillred}{rgb}{.926,0.105,0.184} \definecolor{mcgillreddark}{rgb}{0.617,0.035,0.094} \definecolor{mcgillredlight}{rgb}{0.926,0.153,0.241} \definecolor{verylightred}{rgb}{1,0.95,0.95} \definecolor{darkgreen}{rgb}{0.008, 0.294, 0.188} \definecolor{lightgreen}{rgb}{0.75,1.0,0.75} \definecolor{verylightgreen}{rgb}{0.90,1.0,0.90} \definecolor{darkblue}{rgb}{0.00, 0.00, 0.444} \definecolor{lightblue}{rgb}{0.933,0.933,1.00} \definecolor{psyc}{rgb}{0.87, 0.0, 1.0} \definecolor{mayablue}{rgb}{0.45, 0.76, 0.98} \definecolor{paleblue}{rgb}{0.69, 0.93, 0.93} \definecolor{spirodiscoball}{rgb}{0.06, 0.75, 0.99} \definecolor{lightcyan}{rgb}{0.88, 1.0, 1.0} \definecolor{frenchblue}{rgb}{0.0, 0.45, 0.73} \definecolor{bubbles}{rgb}{0.91, 1.0, 1.0} \definecolor{azurewm}{rgb}{0.94, 1.0, 1.0} \definecolor{airforceblue}{rgb}{0.36, 0.54, 0.66} \definecolor{darkmidnightblue}{rgb}{0.0, 0.2, 0.4} \definecolor{darkpastelblue}{rgb}{0.47, 0.62, 0.8} \definecolor{bleudefrance}{rgb}{0.19, 0.55, 0.91} \definecolor{babyblue}{rgb}{0.54, 0.81, 0.94} \setbeamercolor{background}{bg=red!15!white} \setbeamercovered{transparent} \setbeamercolor{title in sidebar}{fg=lightgray} \setbeamercolor{sidebar}{bg=lightgray,fg=purpleblue} \setbeamercolor{author in sidebar}{fg=lightgray} \setbeamercolor{section in sidebar}{fg=white,bg=mcgillred} \setbeamercolor{title}{fg=white,bg=mcgillred} \setbeamercolor{author}{use=structure,fg=black,bg=darkgray} \setbeamercolor{part name}{fg=black,bg=blue!30!white} \setbeamercolor{institute}{use=structure,fg=black,bg=lightdarkgray} \setbeamercolor{frametitle}{fg=white,bg=mcgillred} \setbeamercolor{subsection in sidebar}{fg=purpleblue} \setbeamercolor{block title}{use=structure,fg=black,bg=blue!30!white} \setbeamercolor{block body}{use=structure,fg=black,bg=blue!20!white} \setbeamertemplate{navigation symbols}{} \setbeamercovered{transparent} \mode \setbeamertemplate{part page} { \begin{centering} %\begin{beamercolorbox}[sep=16pt,center]{author} % \usebeamerfont{part title}\partname~\insertpartnumber \par %\end{beamercolorbox} \vskip1em\par \begin{beamercolorbox}[sep=16pt,center]{institute} \usebeamerfont{part title} \insertpart\par \end{beamercolorbox} \end{centering} } \makeatletter \AtBeginPart{% \beamer@tocsectionnumber=0\relax \setcounter{section}{0} \numberwithin{section}{part} {% start a group to keep the template change local \setbeamertemplate{background canvas}{% \color{white}\rule{\paperwidth}{\paperheight}% }% \setbeamercolor{frametitle}{fg=white,bg=white} \setbeamertemplate{footline}{ \leavevmode% \hbox{\begin{beamercolorbox}[wd=.5\paperwidth,ht=2.5ex,dp=1.125ex,leftskip=.3cm plus1fill,rightskip=.3cm]{author in head/foot}% \usebeamerfont{author in head/foot} \end{beamercolorbox}% \begin{beamercolorbox}[wd=.5\paperwidth,ht=2.5ex,dp=1.125ex,leftskip=.3cm,rightskip=.3cm plus1fil]{title in head/foot}% \usebeamerfont{title in head/foot}\hfill\insertpagenumber \end{beamercolorbox}}% \vskip0pt% } %\addtocounter{framenumber}{-1} \begin{frame}[noframenumbering,c]{\partpage}\end{frame}% }% end group } \makeatother \setbeamercolor{author in head/foot}{bg=white, fg=mcgillred} \setbeamercolor{title in head/foot}{bg=white, fg=mcgillred} \setbeamertemplate{footline} {% %\rule{\paperwidth}{0.025cm} \leavevmode% \hbox{\begin{beamercolorbox}[left, wd=.5\paperwidth,ht=2.5ex,dp=1.125ex,leftskip=.3cm]{title in head/foot}% \usebeamerfont{author in head/foot}\insertpartnumber.\insertsectionnumber: \insertsection \end{beamercolorbox}% \begin{beamercolorbox}[wd=.5\paperwidth,ht=2.5ex,dp=1.125ex,leftskip=.3cm,rightskip=.3cm plus1fil]{title in head/foot}% \usebeamerfont{title in head/foot}\hfill\insertpagenumber \end{beamercolorbox}}% \vskip0pt% } %\usecolortheme[named=kugreen]{structure} %\useinnertheme{circles} %\setbeamercovered{transparent} %\setbeamertemplate{blocks}[rounded][shadow=true] \setbeamertemplate{blocks}[default] \setbeamerfont{itemize/enumerate subbody}{size=\footnotesize} \setbeamertemplate{itemize items}[circle] \newenvironment<>{Ex}[1]{% \begin{actionenv}#2% \def\insertblocktitle{Example: #1}% \par% \mode{% \setbeamercolor{block title}{use=structure,fg=darkmidnightblue,bg=babyblue} \setbeamercolor{block body}{use=structure,fg=black,bg=bubbles} \setbeamertemplate{itemize item}{\scriptsize\raise1.25pt\hbox{\donotcoloroutermaths$\bullet$}} }% \usebeamertemplate{block begin}\justifying} {\par\usebeamertemplate{block end}\end{actionenv}} \newenvironment<>{Thm}[1]{% \begin{actionenv}#2% \def\insertblocktitle{Theorem: #1}% \par% \mode{% \setbeamercolor{block title}{use=structure,fg=white,bg=darkgreen} \setbeamercolor{block body}{use=structure,fg=black,bg=lightgreen} \setbeamercolor{itemize item}{fg=orange!20!black} \setbeamertemplate{itemize item}[circle] }% \usebeamertemplate{block begin}\justifying} {\par\usebeamertemplate{block end}\end{actionenv}} \newenvironment<>{Nt}[1]{% \begin{actionenv}#2% \def\insertblocktitle{Note}% \par% \mode{% \setbeamercolor{block title}{use=structure,fg=white,bg=darkblue} \setbeamercolor{block body}{use=structure,fg=black,bg=azurewm} \setbeamercolor{itemize item}{fg=orange!20!black} \setbeamertemplate{itemize item}[circle] }% \usebeamertemplate{block begin}\justifying} {\par\usebeamertemplate{block end}\end{actionenv}} \newcounter{myNote} \newcommand{\myNote}{\arabic{myNote}} \setcounter{myNote}{1} \def\R{\mathbb{R}} \renewcommand*{\thefootnote}{[\arabic{footnote}]} \renewenvironment<>{Note}{% \begin{actionenv}% \def\insertblocktitle{Note \myNote.}% \par% \mode{% \setbeamercolor{block title}{use=structure,fg=white,bg=darkblue} \setbeamercolor{block body}{use=structure,fg=black,bg=lightblue} \setbeamercolor{itemize item}{fg=orange!20!black} \setbeamertemplate{itemize item}[circle] }% \usebeamertemplate{block begin}\addtocounter{myNote}{1}\justifying} {\par\usebeamertemplate{block end}\end{actionenv}} \date{ } % % The following info should normally be given in you main file: % \usepackage{tikz} \newcommand{\topline}{% \tikz[remember picture,overlay] {% \draw[midgray] ([yshift=-1cm]current page.north west) -- ([yshift=-1cm,xshift=\paperwidth]current page.north west);}} \setbeamertemplate{frametitle}{% \nointerlineskip \begin{beamercolorbox}[sep=0.3cm,ht=1.8em,wd=\paperwidth,rightskip=0cm]{frametitle}% \usebeamerfont{frametitle}\usebeamercolor[fg]{frametitle} \vbox{}\vskip-2ex% \strut\insertframetitle\strut \vskip-1.2ex% \end{beamercolorbox}% } \newcommand\mypart[1]{#1} %\setbeamertemplate{frametitle continuation}[from %second][(\lowercase{\insertcontinuationcountroman})] \setbeamertemplate{frametitle continuation}[from second][] \setbeamertemplate{section in toc}{\insertpartnumber.\inserttocsectionnumber\hspace*{1em}\inserttocsection} \title{Propensity Score Methods, Models and Adjustment} \author{Dr David A. Stephens} \institute{ Department of Mathematics \& Statistics\\ McGill University\\ Montreal, QC, Canada. \vspace{0.1 in} \texttt{david.stephens@mcgill.ca}\\ \texttt{\texttt{www.math.mcgill.ca/dstephens/SISCER2020/}} } \titlegraphic{\includegraphics[width=1.93cm,height=2cm]{McGillLogo}} \setbeamertemplate{itemize item}{\scriptsize\raise1.25pt\hbox{\donotcoloroutermaths$\bullet$}} \setbeamertemplate{itemize subitem}{\tiny\raise1.25pt\hbox{\donotcoloroutermaths$\blacktriangleright$}} \usepackage{etoolbox} %\usepackage{setspace} %\addtocontents{toc}{\protect\setstretch{0.9}} \usepackage{ragged2e} \apptocmd{\frame}{}{\justifying}{} % Allow optional arguments after frame. %\usepackage[bookmarks=true]{hyperref} \lstset{basicstyle=\ttfamily\tiny, numbers=left, numberstyle=\tiny, stepnumber=1, numbersep=5pt} \def\ftilde{\widetilde{f}} \begin{document} %\part{NHANES Example} \setbeamercolor{author in head/foot}{bg=white, fg=mcgillred} \setbeamercolor{title in head/foot}{bg=white, fg=mcgillred} { \setbeamertemplate{footline} {% \leavevmode% \hbox{\begin{beamercolorbox}[wd=.5\paperwidth,ht=2.5ex,dp=1.125ex,leftskip=.3cm plus1fill,rightskip=.3cm]{author in head/foot}% \usebeamerfont{author in head/foot} \end{beamercolorbox}% \begin{beamercolorbox}[wd=.5\paperwidth,ht=2.5ex,dp=1.125ex,leftskip=.3cm,rightskip=.3cm plus1fil]{title in head/foot}% \usebeamerfont{title in head/foot}\hfill\insertpagenumber \end{beamercolorbox}}% \vskip0pt% } <>= library(knitr) # global chunk options opts_chunk$set(cache=TRUE, autodep=TRUE, size = "scriptsize") options(scipen=5) options(repos=c(CRAN="https://cloud.r-project.org/")) @ %\frame{\titlepage} \begin{frame}[fragile,allowframebreaks]\frametitle{The Dirichlet Process and Clustering} The Dirichlet Process is a model for a \emph{random discrete distribution} represented by pmf $\ftilde$. \medskip Suppose we want to sample \begin{itemize} \item $\ftilde \sim DP(\alpha,G_X)$, then \item $Y_1,\ldots,Y_n \sim \ftilde$ independently \end{itemize} and study the \emph{marginal distribution} for $Y_1,\ldots,Y_n$ \[ f_{Y_1,\ldots,Y_n}(y_1,\ldots,y_n) = \int \prod_{i=1}^n \ftilde(y_i) \pi_0(d \ftilde) \] where $\pi_0(d \ftilde) \equiv DP(\alpha,G_X)$. \medskip The marginal distribution is merely the \emph{prior predictive distribution} under the nonparametric model. \framebreak To sample from the marginal distribution \emph{directly} using the \emph{Polya Urn}: \begin{itemize} \justifying \item Sample $Y_1 \sim G_X$ \item For $i=2,\ldots,n$, sample $Y_i|Y_1,\ldots,Y_{i-1}$ from the conditional distribution, which is a mixture of the uniform \emph{discrete} distribution on $\{y_1,\ldots,y_{i-1}\}$ and $G_X$, with weights \[ \frac{i-1}{\alpha+i-1} \quad \text{and} \quad \frac{\alpha}{\alpha+i-1} \] respectively. That is, for $j=1,\ldots,i-1$, \[ \Pr[Y_i = y_j|Y_1=y_1,\ldots,Y_{i-1} = y_{i-1}] = \frac{1}{\alpha+i-1} \qquad \] and with the remaining probability $\alpha/(\alpha+i-1)$, $Y_i \sim G_X$. \end{itemize} \framebreak <>= polya.urn<-function(nv,alv,muv,sigv){ y<-rep(0,nv) y[1]<-rnorm(1,muv,sigv) u<-runif(nv) for(i in 2:nv){ if(u[i] < alv/(alv+i-1)){ y[i]<-rnorm(1,muv,sigv) }else{ y[i]<-sample(y[1:(i-1)],size=1) } } return(y) } set.seed(3) polya.urn(5,alv=2,0,1) @ \framebreak <>= n<-20 al<-2 nreps<-10000 Y.samp<-replicate(nreps,polya.urn(n,2,0,1)) library(dplyr,warn.conflicts = FALSE) K.samp<-apply(Y.samp,2,n_distinct) table(K.samp) #Counts of numbers of distinct Y values prob.K.samp<-tabulate(K.samp,n)/nreps #Proportions @ \framebreak Exact calculation: \[ \Pr[K = k] = \frac{\alpha^k B(n,k)}{\sum\limits_{j=1}^n \alpha^j B(n,j)} \] where $B(n,k)$ is an (unsigned) Stirling Number of the First Kind. <>= library(gmp) #For the Stirling numbers Stirling1.all(n) @ \framebreak <>= S.nk<-abs(as.numeric(Stirling1.all(n))) lprob.vec<-c(1:n)*log(al)+log(S.nk) prob.vec<-exp(lprob.vec-max(lprob.vec)) prob.vec<-prob.vec/sum(prob.vec) round(prob.vec,6) prob.compare<-cbind(prob.vec,prob.K.samp) row.names(prob.compare)<-paste('K =',1:n) colnames(prob.compare)<-c('True','Sample') @ \framebreak <>= prob.compare[1:10,] @ \framebreak We can sample the cluster membership \emph{directly} in the Polya Urn <>= polya.urn.labels<-function(nv,alv,muv,sigv){ z<-rep(0,nv) z[1]<-1 u<-runif(nv) Kco<-1 for(i in 2:nv){ if(u[i] < alv/(alv+i-1)){ Kco<-Kco+1 } z[i]<-Kco } return(z) } set.seed(3) polya.urn.labels(5,alv=2,0,1) @ <>= K.samp.labels<-replicate(nreps,polya.urn.labels(n,2,0,1))[,n] prob.K.samp.labels<-tabulate(K.samp,n)/nreps #Proportions prob.compare<-cbind(prob.vec,prob.K.samp,prob.K.samp.labels) row.names(prob.compare)<-paste('K =',1:n) colnames(prob.compare)<-c('True','Sample','Labels') prob.compare[1:10,] @ In both approaches we can sample the \emph{observables} $Y$ \emph{without} having to sample the components of $\ftilde$. \framebreak In the \emph{posterior distribution} we have $\alpha^\star = \alpha + n$, for any set $B$ \[ G_X^\star(B) = \frac{\alpha}{\alpha+n} G_X(B) + \frac{1}{\alpha+n} \sum_{i=1}^n \delta_{y_i}(B). \] and then \[ \pi_n(d \ftilde) \equiv DP(\alpha^\star,G_X^\star). \] \framebreak \emph{Posterior predictive:} To obtain the posterior predictive, we compute in the usual way \[ f_{Y_{n+1},\ldots,Y_{n+m}|Y_1,\ldots,Y_n}(y_{n+1},\ldots,y_{n+m}|y_1,\ldots,y_n) = \int \prod_{i=n+1}^{n+m} \ftilde(y_i) \pi_n(d \ftilde) \] \framebreak To obtain the sample $\textcolor{red}{Y_{n+1}},\ldots,\textcolor{red}{Y_{n+m}}$ from the posterior predictive we may use \emph{direct} simulation \begin{itemize} \item sample $\ftilde \sim \pi_n(d \ftilde)$ \item sample $\textcolor{red}{Y_{n+1}},\ldots,\textcolor{red}{Y_{n+m}}$ independently from $\ftilde$ \end{itemize} \framebreak We may also use the \emph{Polya Urn} \begin{itemize} \justifying \item Sample $\textcolor{red}{Y_{n+1}} \sim G_X^\star$, that is \[ \textcolor{red}{Y_{n+1}} \sim \left\{ \begin{array}{cl} G_X & \textrm{w.p. } \alpha/(\alpha+n) \\[6pt] \delta_{y_i}(y) & \textrm{w.p. } 1/(\alpha+n), \ i=1,\ldots,n. \end{array} \right. \] \item For $i=2,\ldots,m$, sample \[ \textcolor{red}{Y_{n+i}} \sim \left\{ \begin{array}{cl} G_X^\star & \textrm{w.p. } \alpha^\star/(\alpha^\star + i -1 ) \\[6pt] \delta_{y_{n+j}}(y) & \textrm{w.p. } 1/(\alpha^\star+i-1), \ j=1,\ldots,i-1. \end{array} \right. \] \framebreak That is, \[ \textcolor{red}{Y_{n+i}} \sim \left\{ \begin{array}{cl} G_X & \textrm{w.p. } \alpha/(\alpha + n + i -1 ) \\[6pt] \delta_{y_{j}}(y) & \textrm{w.p. } 1/(\alpha+n+i-1), \ j=1,\ldots,n+i-1. \end{array} \right. \] This sequential generation treats the \emph{all} samples obtained \emph{before} $n+i$, \[ y_1,\ldots,y_{n-1},y_{n},\textcolor{red}{y_{n+1}},\ldots,\textcolor{red}{y_{n+i-1}} \] on an equal basis. \end{itemize} \framebreak In the limiting case of $\alpha \longrightarrow 0$, we have that $\alpha^\star = n$ and \[ G_X^\star(B) = \frac{1}{n} \sum_{i=1}^n \delta_{y_i}(B). \] The limiting posterior distribution places random probabilities \[ \pi \sim Dirichlet(n-1;1,\ldots,1) \] on the observed data $\{y_1,\ldots,y_n\}$. \framebreak Sample the limiting posterior using stick-breaking: <>= limit.DP<-function(nv,Yv,Mv=10000){ V<-rbeta(Mv,1,nv) pivec<-c(V[1],cumprod(1-V[-Mv])*V[-1]) Xvec<-sample(Yv,size=Mv,rep=T) ptot<-rep(0,nv) for(i in 1:nv){ ptot[i]<-sum(pivec[Xvec==Yv[i]]) } return(ptot) } nreps<-10000 n<-10 Y0<-runif(n) pimat<-replicate(nreps,limit.DP(n,Y0)) @ \framebreak Sampled probabilities on $y_1$ are distributed as $Beta(1,n-1)$. <>= hist(pimat[1,],freq=FALSE,br=seq(0,1,by=0.01), main='',xlab=expression(pi[1]));box() xv<-seq(0,1,by=0.001);yv<-dbeta(xv,1,n-1) lines(xv,yv,col='red') legend(0.6,8,c('Beta(1,n-1)'),lty=1,col='red') @ \framebreak Posterior predictive: Direct sampling <>= library(extraDistr) n<-10 m<-1000 Zsamp<-matrix(0,nrow=nreps,ncol=n) for(i in 1:nreps){ pi.vec<-rdirichlet(1,rep(1,n)) Z<-sample(1:n,size=m,prob=pi.vec,rep=T) Zsamp[i,]<-tabulate(Z,nbins=n) } @ Posterior predictive: Polya Urn <>= set.seed(2413) Zsamp0<-matrix(0,nrow=nreps,ncol=n) for(i in 1:nreps){ Z<-1:n for(j in 1:m){Z<-c(Z,sample(Z,size=1))} Zsamp0[i,]<-tabulate(Z,nbins=n)-1 } @ \framebreak <>= all.Z<-cbind(Zsamp[,1],Zsamp0[,1]) for(j in 2:n){all.Z<-cbind(all.Z,Zsamp[,j],Zsamp0[,j])} par(mar=c(5,4,3,6),xpd=TRUE,xaxt='n') avec<-1:(3*n);avec<-avec[-3*(1:n)] boxplot(all.Z,pch=19,cex=0.5,col=c('red','blue'),at=avec) legend("topright", c("Direct", "Polya Urn"),cex=0.75, fill = c("red", "blue"),inset=c(-0.175,0)) nvec<-c(expression(y[1]),expression(y[2]), expression(y[3]),expression(y[4]), expression(y[5]),expression(y[6]), expression(y[7]),expression(y[8]), expression(y[9]),expression(y[10])) text(1.5+c(0:(n-1))*3,rep(-100,10),nvec) title('Number of times each y is sampled (n=10)') @ \framebreak Contrast this with ordinary \emph{bootstrap} resampling, which resamples $y$ values with \emph{deterministic} probabilities \[ \left\{ \frac{1}{n},\ldots \frac{1}{n} \right\} \] <>= set.seed(2413) ZsampB<-matrix(0,nrow=nreps,ncol=n) for(i in 1:nreps){ Z<-sample(1:n,size=m,rep=T) ZsampB[i,]<-tabulate(Z,nbins=n) } @ \framebreak <>= all.Z<-cbind(Zsamp[,1],Zsamp0[,1],ZsampB[,1]) for(j in 2:n){all.Z<-cbind(all.Z,Zsamp[,j],Zsamp0[,j],ZsampB[,j])} par(mar=c(5,4,3,6),xpd=TRUE,xaxt='n') avec<-1:(4*n);avec<-avec[-4*(1:n)] boxplot(all.Z,pch=19,cex=0.5,col=c('red','blue','green'),at=avec) legend("topright", c("Direct", "Polya Urn",'Bootstrap'),cex=0.75, fill = c("red", "blue","green"),inset=c(-0.175,0)) nvec<-c(expression(y[1]),expression(y[2]), expression(y[3]),expression(y[4]), expression(y[5]),expression(y[6]), expression(y[7]),expression(y[8]), expression(y[9]),expression(y[10])) text(2+c(0:(n-1))*4,rep(-50,10),nvec) title('Number of times each y is sampled (n=10)') abline(m/n,0,col='red',lty=2) @ \framebreak <>= n<-1000 m<-10000 Zsamp<-matrix(0,nrow=nreps,ncol=n) for(i in 1:nreps){ pi.vec<-rdirichlet(1,rep(1,n)) Z<-sample(1:n,size=m,prob=pi.vec,rep=T) Zsamp[i,]<-tabulate(Z,nbins=n) } Zsamp0<-matrix(0,nrow=nreps,ncol=n) for(i in 1:nreps){ Z<-1:n for(j in 1:m){Z<-c(Z,sample(Z,size=1))} Zsamp0[i,]<-tabulate(Z,nbins=n)-1 } ZsampB<-matrix(0,nrow=nreps,ncol=n) for(i in 1:nreps){ Z<-sample(1:n,size=m,rep=T) ZsampB[i,]<-tabulate(Z,nbins=n) } @ \framebreak <>= all.Z<-cbind(Zsamp[,1],Zsamp0[,1],ZsampB[,1]) nsub<-10 for(j in 2:nsub){all.Z<-cbind(all.Z,Zsamp[,j],Zsamp0[,j],ZsampB[,j])} par(mar=c(5,4,3,6),xpd=TRUE,xaxt='n') avec<-1:(4*nsub);avec<-avec[-4*(1:nsub)] boxplot(all.Z,pch=19,cex=0.5,col=c('red','blue','green'),at=avec) legend("topright", c("Direct", "Polya Urn",'Bootstrap'),cex=0.75, fill = c("red", "blue","green"),inset=c(-0.175,0)) nvec<-c(expression(y[1]),expression(y[2]), expression(y[3]),expression(y[4]), expression(y[5]),expression(y[6]), expression(y[7]),expression(y[8]), expression(y[9]),expression(y[10])) text(2+c(0:(nsub-1))*4,rep(-10,10),nvec) title('Number of times each y is sampled (n=1000)') abline(m/n,0,col='red',lty=2) @ \framebreak The Bayesian nonparametric formulations always yield \emph{more variable} samples, although the difference gets less marked as $n$ increases. \end{frame} \end{document}