\documentclass{beamer} \usepackage{verbatim} \usecolortheme{seagull} \usepackage{pstricks,pst-node,pst-tree,amscd,amsfonts,verbatim,geometry,bbm} \usepackage{fancyvrb,fancybox} \usepackage{color} \usepackage{xcolor} \usepackage{colortbl} \usepackage{graphicx} %\RequirePackage{etex} %\usepackage{etex} %\usepackage{xypic} \usepackage[all,color]{xy} \usepackage{natbib} \usepackage{tkz-graph} \usepackage{tikz} \usetikzlibrary{arrows,automata} \usetikzlibrary{graphs,arrows.meta} \usetikzlibrary{positioning} \usepackage{caption} \captionsetup[figure]{labelfont={it},textfont={color=black,it},labelformat={default},labelsep=period,name={}} \usepackage[latin1]{inputenc} \usepackage{undertilde} \usepackage{bm} \usepackage{amssymb} \usepackage{amsmath} %\usepackage{lastpage} \usepackage{rotating} \usepackage{multirow} \usepackage{graphicx} \usepackage{bbm} \usepackage{bbold} \usepackage{upgreek} \usepackage{fancybox} \usepackage{fancyhdr} \usepackage{amsfonts} \usepackage{dsfont} %\usepackage{arydshln} \usepackage{listings} \usepackage{courier} \lstloadlanguages{R} \usepackage[scaled=.85]{DejaVuSerifCondensed} \usepackage[T1]{fontenc} \usefonttheme{serif} \DeclareFontFamily{OT1}{pzc}{} \DeclareFontShape{OT1}{pzc}{m}{it}{<-> s * [1.30] pzcmi7t}{} \DeclareMathAlphabet{\mathpzc}{OT1}{pzc}{m}{it} %\theoremstyle{myNote} %\newtheorem{myNote}{Note} \usepackage{mathtools} \usepackage{bookmark} \usepackage{amsthm} \usepackage{ulem} \usepackage{soul} \usepackage{hanging} \setbeamertemplate{footnote}{\hangpara{2em}{1}\makebox[2em][l]{\insertfootnotemark}\footnotesize\insertfootnotetext\par} \addtobeamertemplate{footnote}{}{\vspace{5ex}} \hypersetup{% unicode =false, % non-Latin characters in Acrobat’s bookmarks pdftoolbar=true, % show Acrobat’s toolbar? pdfmenubar=true, % show Acrobat’s menu? %bookmarks=false, bookmarksopen=false } \usepackage{calc} \newsavebox\CBox \newcommand\hcancel[2][0.25pt]{% \ifmmode\sbox\CBox{$#2$}\else\sbox\CBox{#2}\fi% \makebox[0pt][l]{\usebox\CBox}% \rule[0.5\ht\CBox-#1/2]{\wd\CBox}{#1}} \newcommand{\msout}[1]{\text{\sout{\ensuremath{#1}}}} \makeatletter \newcommand*{\shifttext}[2]{% \settowidth{\@tempdima}{#2}% \makebox[\@tempdima]{\hspace*{#1}#2}% } \makeatother \newcommand\oldmyzeta[1]{{\textcolor{red}{\protect\mathpzc{z}_{#1}} \shifttext{-10pt}{\raisebox{0.1ex}{\tiny{\sim}}}}} \newcommand\firstmyzeta[1]{\textcolor{black}{{\protect\mathpzc{z}_{#1}}}} \newcommand\myzeta[1]{{\textcolor{mcgillred}{{\protect\textsf{z}_{#1}}}}} \newcommand\tildemyzeta[1]{\textcolor{mcgillred}{{\protect\widetilde{\textsf{z}}_{#1}}}} \theoremstyle{plain} \newtheorem{thm}{Theorem} % reset theorem numbering for each chapter \theoremstyle{definition} \newtheorem{defn}[thm]{Definition} % definition numbers are dependent on theorem numbers \newtheorem{Note}{Note} % same for example numbers \setbeamertemplate{theorems}[numbered] \def\U{\mathcal{U}} \def\u{\upsilon} \def\Ind{\mathds{1}} \def\transpose{\top} \def\Xvec{{\skew0\bm{X}}} \def\Xveca{{\skew0\bm{X}}_1} \def\Xvecb{{\skew0\bm{X}}_2} \def\Yvec{{\skew0\bm{Y}}} \def\xvec{{\skew0\bm{x}}} \def\xveca{{\skew0\bm{x}}_1} \def\xvecb{{\skew0\bm{x}}_2} \def\mvec{{\skew0\bm{m}}} \def\muvec{{\skew0\bm{\mu}}} \def\cvec{{\skew0\mathbf{c}}} \def\Xbar{\overline{X}} \def\xbar{\overline{x}} \def\ybar{\overline{y}} \def\yhat{\widehat{y}} \def\betahat{\widehat{\beta}} \def\betavec{\utilde{\beta}} \def\betahatvec{\skew3\widehat{\utilde{\beta}}} \def\sigmahat{\widehat{\sigma}} \def\ehat{\widehat{e}} \def\zhat{\widehat{z}} \def\nhat{\widehat{n}} \def\xbarA{\overline{x}_A} \def\xbarB{\overline{x}_B} \def\calR{\mathcal{R}} \def\calS{\mathcal{S}} \def\calE{{\mathcal{S}_{e}}} \def\calB{\mathcal{B}} \def\calU{\mathcal{U}} \def\calI{\mathcal{I}} \def\B{\mathcal{B}} \def\Qbb{\mathbb{Q}} \def\Pbb{\mathbb{P}} \def\E{\mathbb{E}} \def\Ehat{\widehat{\mathbbm{E}}} \def\Var{\mathbb{V}ar} \def\Cov{\mathbb{C}ov} \def\Corr{\mathbb{C}orr} \def\bhatint{\widehat{\beta}_0} \def\bhatsl{\widehat{\beta}_1} \def\d{\:\textrm{d}} \def\ps{{\textsf{e}}} \def\bs{{\textsf{b}}} \def\bX{\bm{X}} \def\Ident{\mathbf{I}} \def\One{\mathbf{1}} \def\Zero{\mathbf{0}} \def\bY{\mathbf{Y}} \newcommand\xbarBi[1]{\overline{x^{\text{\tiny{(B)}}}_{#1}}} \newcommand\xbarib[1]{\overline{x^{\text{\tiny{(B)}}}_{#1}}} \def\Fstat{\frac{(SSE_R - SSE_C)/(k-g)}{SSE_C/(n-k-1)}} \def\approxsim{\sim \! \! \! : \;} \def\hsp{\vspace{0.1 in}} \def\fsp{\vspace{0.2 in}} %\def\dfrac#1#2{{\displaystyle {#1 #2}}}% \def\dint{\mathop{\displaystyle \int}}% \def\dsum{\mathop{\displaystyle \sum}}% \newcommand\betaZ{$\beta_{1}$} \newcommand\betaA[1]{$\beta_{#1}^{(A)}$} \newcommand\betaB[1]{$\beta_{#1}^{(B)}$} \newcommand\gammaAB[1]{$\gamma_{#1}^{(AB)}$} \def\SST{\mathit{SST}} \def\MST{\mathit{MST}} \def\SSE{\mathit{SSE}} \def\MSE{\mathit{MSE}} \def\SSB{\mathit{SSB}} \def\MSB{\mathit{MSB}} \def\SSI{\mathit{SSI}} \def\MSI{\mathit{MSI}} \def\SS{\mathit{SS}} \def\Ztilde{{\widetilde Z}} \def\ztilde{{\widetilde z}} \def\bmu{\boldsymbol\mu} \newcounter{Lecture} \def\calA{\mathcal{A}} \renewcommand{\thelecture}{\arabic{lecture}} \newcommand\xbari[1]{\overline{x}_{#1}} \newcommand\subsectnum[1]{\textbf{\arabic{section}.\arabic{subsection}} \textbf{\uppercase{#1}}} \newcommand\textsclarge[1]{\fontshape{sc}\fontsize{14}{15} \selectfont{#1}} \renewcommand\emph[1]{\textcolor{mcgillred}{\textit{#1}}} \def\Exp{\mathcal{E}} \def\Ex{\mathcal{E}} \def\Exzero{\mathcal{E}_0} \def\Obs{\mathcal{O}} \def\Ob{\mathcal{O}} \usepackage{tikz} \newcommand*\circled[1]{\tikz[baseline=(char.base)]{ \node[shape=circle,draw,inner sep=1pt] (char) {#1};}} \def\WideSpacing{\itemsep=2.0ex\topsep=0.5ex\partopsep=0.0ex\parskip=0.0ex\parsep=0.0ex} \makeatletter \let\orig@Enumerate =\enumerate \newenvironment{enumerateWide}{\orig@Enumerate\WideSpacing}{\endlist} \makeatother \definecolor{midgray}{rgb}{0.75,0.75,0.75} \definecolor{lightgray}{rgb}{0.85,0.85,0.85} \definecolor{darkgray}{rgb}{0.654,0.724,0.795} \definecolor{lightdarkgray}{rgb}{0.727,0.805,0.883} \definecolor{purpleblue}{rgb}{0.50,0.50,1.00} \definecolor{lightblue}{rgb}{0.90,0.90,1.00} \definecolor{kugreen}{RGB}{50,93,61} \definecolor{kugreenlys}{RGB}{132,158,139} \definecolor{kugreenlyslys}{RGB}{173,190,177} \definecolor{kugreenlyslyslys}{RGB}{214,223,216} \definecolor{mcgillred}{rgb}{.926,0.105,0.184} \definecolor{mcgillreddark}{rgb}{0.617,0.035,0.094} \definecolor{mcgillredlight}{rgb}{0.926,0.153,0.241} \definecolor{verylightred}{rgb}{1,0.95,0.95} \definecolor{darkgreen}{rgb}{0.008, 0.294, 0.188} \definecolor{lightgreen}{rgb}{0.75,1.0,0.75} \definecolor{verylightgreen}{rgb}{0.90,1.0,0.90} \definecolor{darkblue}{rgb}{0.00, 0.00, 0.444} \definecolor{lightblue}{rgb}{0.933,0.933,1.00} \definecolor{psyc}{rgb}{0.87, 0.0, 1.0} \definecolor{mayablue}{rgb}{0.45, 0.76, 0.98} \definecolor{paleblue}{rgb}{0.69, 0.93, 0.93} \definecolor{spirodiscoball}{rgb}{0.06, 0.75, 0.99} \definecolor{lightcyan}{rgb}{0.88, 1.0, 1.0} \definecolor{frenchblue}{rgb}{0.0, 0.45, 0.73} \definecolor{bubbles}{rgb}{0.91, 1.0, 1.0} \definecolor{azurewm}{rgb}{0.94, 1.0, 1.0} \definecolor{airforceblue}{rgb}{0.36, 0.54, 0.66} \definecolor{darkmidnightblue}{rgb}{0.0, 0.2, 0.4} \definecolor{darkpastelblue}{rgb}{0.47, 0.62, 0.8} \definecolor{bleudefrance}{rgb}{0.19, 0.55, 0.91} \definecolor{babyblue}{rgb}{0.54, 0.81, 0.94} \setbeamercolor{background}{bg=red!15!white} \setbeamercovered{transparent} \setbeamercolor{title in sidebar}{fg=lightgray} \setbeamercolor{sidebar}{bg=lightgray,fg=purpleblue} \setbeamercolor{author in sidebar}{fg=lightgray} \setbeamercolor{section in sidebar}{fg=white,bg=mcgillred} \setbeamercolor{title}{fg=white,bg=mcgillred} \setbeamercolor{author}{use=structure,fg=black,bg=darkgray} \setbeamercolor{part name}{fg=black,bg=blue!30!white} \setbeamercolor{institute}{use=structure,fg=black,bg=lightdarkgray} \setbeamercolor{frametitle}{fg=white,bg=mcgillred} \setbeamercolor{subsection in sidebar}{fg=purpleblue} \setbeamercolor{block title}{use=structure,fg=black,bg=blue!30!white} \setbeamercolor{block body}{use=structure,fg=black,bg=blue!20!white} \setbeamertemplate{navigation symbols}{} \setbeamercovered{transparent} \mode \setbeamertemplate{part page} { \begin{centering} %\begin{beamercolorbox}[sep=16pt,center]{author} % \usebeamerfont{part title}\partname~\insertpartnumber \par %\end{beamercolorbox} \vskip1em\par \begin{beamercolorbox}[sep=16pt,center]{institute} \usebeamerfont{part title} \insertpart\par \end{beamercolorbox} \end{centering} } \makeatletter \AtBeginPart{% \beamer@tocsectionnumber=0\relax \setcounter{section}{0} \numberwithin{section}{part} {% start a group to keep the template change local \setbeamertemplate{background canvas}{% \color{white}\rule{\paperwidth}{\paperheight}% }% \setbeamercolor{frametitle}{fg=white,bg=white} \setbeamertemplate{footline}{ \leavevmode% \hbox{\begin{beamercolorbox}[wd=.5\paperwidth,ht=2.5ex,dp=1.125ex,leftskip=.3cm plus1fill,rightskip=.3cm]{author in head/foot}% \usebeamerfont{author in head/foot} \end{beamercolorbox}% \begin{beamercolorbox}[wd=.5\paperwidth,ht=2.5ex,dp=1.125ex,leftskip=.3cm,rightskip=.3cm plus1fil]{title in head/foot}% \usebeamerfont{title in head/foot}\hfill\insertpagenumber \end{beamercolorbox}}% \vskip0pt% } %\addtocounter{framenumber}{-1} \begin{frame}[noframenumbering,c]{\partpage}\end{frame}% }% end group } \makeatother \setbeamercolor{author in head/foot}{bg=white, fg=mcgillred} \setbeamercolor{title in head/foot}{bg=white, fg=mcgillred} \setbeamertemplate{footline} {% %\rule{\paperwidth}{0.025cm} \leavevmode% \hbox{\begin{beamercolorbox}[left, wd=.5\paperwidth,ht=2.5ex,dp=1.125ex,leftskip=.3cm]{title in head/foot}% \usebeamerfont{author in head/foot}\insertpartnumber.\insertsectionnumber: \insertsection \end{beamercolorbox}% \begin{beamercolorbox}[wd=.5\paperwidth,ht=2.5ex,dp=1.125ex,leftskip=.3cm,rightskip=.3cm plus1fil]{title in head/foot}% \usebeamerfont{title in head/foot}\hfill\insertpagenumber \end{beamercolorbox}}% \vskip0pt% } %\usecolortheme[named=kugreen]{structure} %\useinnertheme{circles} %\setbeamercovered{transparent} %\setbeamertemplate{blocks}[rounded][shadow=true] \setbeamertemplate{blocks}[default] \setbeamerfont{itemize/enumerate subbody}{size=\footnotesize} \setbeamertemplate{itemize items}[circle] \newenvironment<>{Ex}[1]{% \begin{actionenv}#2% \def\insertblocktitle{Example: #1}% \par% \mode{% \setbeamercolor{block title}{use=structure,fg=darkmidnightblue,bg=babyblue} \setbeamercolor{block body}{use=structure,fg=black,bg=bubbles} \setbeamertemplate{itemize item}{\scriptsize\raise1.25pt\hbox{\donotcoloroutermaths$\bullet$}} }% \usebeamertemplate{block begin}\justifying} {\par\usebeamertemplate{block end}\end{actionenv}} \newenvironment<>{Thm}[1]{% \begin{actionenv}#2% \def\insertblocktitle{Theorem: #1}% \par% \mode{% \setbeamercolor{block title}{use=structure,fg=white,bg=darkgreen} \setbeamercolor{block body}{use=structure,fg=black,bg=lightgreen} \setbeamercolor{itemize item}{fg=orange!20!black} \setbeamertemplate{itemize item}[circle] }% \usebeamertemplate{block begin}\justifying} {\par\usebeamertemplate{block end}\end{actionenv}} \newenvironment<>{Nt}[1]{% \begin{actionenv}#2% \def\insertblocktitle{Note}% \par% \mode{% \setbeamercolor{block title}{use=structure,fg=white,bg=darkblue} \setbeamercolor{block body}{use=structure,fg=black,bg=azurewm} \setbeamercolor{itemize item}{fg=orange!20!black} \setbeamertemplate{itemize item}[circle] }% \usebeamertemplate{block begin}\justifying} {\par\usebeamertemplate{block end}\end{actionenv}} \newcounter{myNote} \newcommand{\myNote}{\arabic{myNote}} \setcounter{myNote}{1} \def\R{\mathbb{R}} \renewcommand*{\thefootnote}{[\arabic{footnote}]} \renewenvironment<>{Note}{% \begin{actionenv}% \def\insertblocktitle{Note \myNote.}% \par% \mode{% \setbeamercolor{block title}{use=structure,fg=white,bg=darkblue} \setbeamercolor{block body}{use=structure,fg=black,bg=lightblue} \setbeamercolor{itemize item}{fg=orange!20!black} \setbeamertemplate{itemize item}[circle] }% \usebeamertemplate{block begin}\addtocounter{myNote}{1}\justifying} {\par\usebeamertemplate{block end}\end{actionenv}} \date{ } % % The following info should normally be given in you main file: % \usepackage{tikz} \newcommand{\topline}{% \tikz[remember picture,overlay] {% \draw[midgray] ([yshift=-1cm]current page.north west) -- ([yshift=-1cm,xshift=\paperwidth]current page.north west);}} \setbeamertemplate{frametitle}{% \nointerlineskip \begin{beamercolorbox}[sep=0.3cm,ht=1.8em,wd=\paperwidth,rightskip=0cm]{frametitle}% \usebeamerfont{frametitle}\usebeamercolor[fg]{frametitle} \vbox{}\vskip-2ex% \strut\insertframetitle\strut \vskip-1.2ex% \end{beamercolorbox}% } \newcommand\mypart[1]{#1} %\setbeamertemplate{frametitle continuation}[from %second][(\lowercase{\insertcontinuationcountroman})] \setbeamertemplate{frametitle continuation}[from second][] \setbeamertemplate{section in toc}{\insertpartnumber.\inserttocsectionnumber\hspace*{1em}\inserttocsection} \title{Propensity Score Methods, Models and Adjustment} \author{Dr David A. Stephens} \institute{ Department of Mathematics \& Statistics\\ McGill University\\ Montreal, QC, Canada. \vspace{0.1 in} \texttt{david.stephens@mcgill.ca}\\ \texttt{\texttt{www.math.mcgill.ca/dstephens/SISCER2020/}} } \titlegraphic{\includegraphics[width=1.93cm,height=2cm]{McGillLogo}} \setbeamertemplate{itemize item}{\scriptsize\raise1.25pt\hbox{\donotcoloroutermaths$\bullet$}} \setbeamertemplate{itemize subitem}{\tiny\raise1.25pt\hbox{\donotcoloroutermaths$\blacktriangleright$}} \usepackage{etoolbox} %\usepackage{setspace} %\addtocontents{toc}{\protect\setstretch{0.9}} \usepackage{ragged2e} \apptocmd{\frame}{}{\justifying}{} % Allow optional arguments after frame. %\usepackage[bookmarks=true]{hyperref} \lstset{basicstyle=\ttfamily\tiny, numbers=left, numberstyle=\tiny, stepnumber=1, numbersep=5pt} \begin{document} %\part{NHANES Example} \setbeamercolor{author in head/foot}{bg=white, fg=mcgillred} \setbeamercolor{title in head/foot}{bg=white, fg=mcgillred} { \setbeamertemplate{footline} {% \leavevmode% \hbox{\begin{beamercolorbox}[wd=.5\paperwidth,ht=2.5ex,dp=1.125ex,leftskip=.3cm plus1fill,rightskip=.3cm]{author in head/foot}% \usebeamerfont{author in head/foot} \end{beamercolorbox}% \begin{beamercolorbox}[wd=.5\paperwidth,ht=2.5ex,dp=1.125ex,leftskip=.3cm,rightskip=.3cm plus1fil]{title in head/foot}% \usebeamerfont{title in head/foot}\hfill\insertpagenumber \end{beamercolorbox}}% \vskip0pt% } <>= library(knitr) # global chunk options opts_chunk$set(cache=TRUE, autodep=TRUE, size = "scriptsize") options(scipen=5) options(repos=c(CRAN="https://cloud.r-project.org/")) @ %\frame{\titlepage} \begin{frame}[fragile,allowframebreaks]\frametitle{The Dirichlet Process} The \emph{Dirichlet Process} is a probability distribution on the space of discrete distributions on $\R$ characterized by \begin{itemize} \item a \emph{base distribution}, $G_X$, which is some probability measure on $\R$, from which are drawn random variables $X_1,X_2,\ldots$. \item a countable collection of \emph{random probabilities} $\pi_1,\pi_2,\ldots$ with $\pi_i > 0$ such that \[ \sum\limits_{i=1}^\infty \pi_i = 1 \] with $\pi_1 = V_1 \sim Beta(1,\alpha)$ and for $i=2,3,\ldots$, \[ \pi_i = V_i \prod_{j=1}^{i-1} (1-V_j) \] where for each $j$, $V_j \sim Beta(1,\alpha)$. \end{itemize} \framebreak <>= set.seed(3) alpha<-2.5;M<-5000 nreps<-2000 pi.mat<-matrix(0,nrow=nreps,ncol=M) for(i in 1:nreps){ V<-rbeta(M,1,alpha) pi.mat[i,]<-c(V[1],cumprod(1-V[-M])*V[-1]) } round(pi.mat[1,1:6],5) round(pi.mat[2,1:6],5) round(pi.mat[3,1:6],5) @ \framebreak $\pi$ sequence is eventually zero to machine accuracy <>= pi.trunc<-apply(pi.mat,1,which.min) pi.trunc[1:10] pi.mat[1,c(pi.trunc[1]-1,pi.trunc[1])] @ \framebreak The probability masses are placed at points drawn from $G_X$: suppose $G_X \equiv Normal(0,2^2)$. <>= sig<-2 X.mat<-matrix(rnorm(M*nreps,0,sig),nrow=nreps) par(mar=c(4,4,1,0)) plot(X.mat[1,],rep(0,M),type='n', xlim=range(-6,6),ylim=range(0,1),xlab='X',ylab='Prob') for(i in 1:5){ X.vec<-X.mat[i,] pi.vec<-pi.mat[i,order(X.vec)] X.vec<-sort(X.vec) lines(X.vec,cumsum(pi.vec),type='s') } @ \framebreak For \emph{any partition} \[ B_1,\ldots,B_K,B_{K+1} \] of $\R$, the \emph{amounts of probability} \[ P_1,\ldots,P_{K},P_{K+1} \] assigned to the elements of the partition form a \[ Dirichlet(K+1;\alpha_1,\alpha_2,\ldots,\alpha_K,\alpha_{K+1}) \] random vector, where \[ \alpha_k = \alpha G_X(B_k) \qquad k=1,\ldots,K+1 \] \framebreak The \emph{Dirichlet distribution} is a model for a collection of probabilities {\scriptsize \[ f(\pi_1,\ldots,\pi_{K}) = \frac{\Gamma(\alpha_1+\cdots+\alpha_{K+1})}{\Gamma(\alpha_1) \cdots \Gamma(\alpha_{K+1})} \pi_1^{\alpha_1 - 1} \cdots \pi_{K}^{\alpha_{K}-1} (1-\pi_1-\cdots-\pi_{K})^{\alpha_{K+1} - 1} \]} where \[ 0 < \sum_{k=1}^{K} \pi_k < 1 \] \bigskip Two ways to simulate: \begin{enumerate}[(i)] \item \emph{Direct:} For $k=1,\ldots,K+1$, $W_k \sim Gamma(\alpha_k,1)$ independent, then \[ \pi_k = \frac{W_k}{W_1+\cdots+W_K} \qquad k=1,\ldots,K \] \framebreak \item \emph{Sequential:} \begin{itemize} \item $\pi_1 \sim Beta(\alpha_1,\alpha_2+\cdots+\alpha_{K+1})$ \item for $k=2,3,\ldots,K$ \[ \pi_k = (1-\pi_1-\pi_2 -\cdots - \pi_{k-1}) V_k \] where \[ V_k \sim Beta(\alpha_k,\alpha_{k+1}+\cdots+\alpha_{K+1}) \] \end{itemize} \end{enumerate} \framebreak Form a partition: <>= K<-3 b.v<-c(-Inf,-1,1,2,Inf) G.v<-diff(pnorm(b.v,0,sig)) round(G.v,5) al.k<-alpha*G.v al.k al.k/sum(al.k) @ \framebreak Add up the probabilities in each partition element: <>= par(mar=c(4,3,1,0)) plot(X.mat[1,],pi.mat[1,],pch=19,cex=0.5,xlab='X', xlim=range(-6,6),ylim=range(0,1)) abline(v=b.v,lty=2,col='red') @ \framebreak <>= #Classify each X to a partition element class.mat<-t(apply(X.mat,1,cut,breaks=b.v, include.lowest=TRUE,labels=FALSE)) table(class.mat[1,]) @ \framebreak <>= #Sum the probabilities prob.totals<-matrix(0,nrow=nreps,ncol=K+1) for(i in 1:nreps){ for(k in 1:(K+1)){ prob.totals[i,k]<-sum(pi.mat[i,class.mat[i,]==k]) } } apply(prob.totals,2,mean) #Mean probability over 1000 replicates al.k/sum(al.k) #Expected value @ \framebreak Plot the sampled partition probabilities: <>= pairs(prob.totals,pch=19,cex=0.5, labels=c(expression(B[1]),expression(B[2]), expression(B[3]),expression(B[4]))) @ \framebreak Sampling the Dirichlet distribution for comparison: <>= library(extraDistr) dirprob.mat<-rdirichlet(nreps, al.k) pairs(dirprob.mat,pch=19,cex=0.5, labels=c(expression(B[1]),expression(B[2]), expression(B[3]),expression(B[4]))) @ \framebreak Comparison: <>= all.probs<-cbind(prob.totals,dirprob.mat)[,c(1,5,2,6,3,7,4,8)] par(mar=c(5,4,1,6),xpd=TRUE,xaxt='n') avec<-c(1,2,4,5,7,8,10,11) boxplot(all.probs,pch=19,cex=0.5,col=c('red','blue'),at=avec) legend("topright", c("DP", "Dirichlet"), fill = c("red", "blue"),inset=c(-0.2,0)) nvec<-c(expression(P[1]),expression(P[2]), expression(P[3]),expression(P[4])) text(c(1.5,4.5,7.5,10.5),rep(-0.1,4),nvec) @ \framebreak The \emph{random mass function} $\widetilde f(x)$ for new random variable $Y$ \[ \widetilde f(x) = \sum_{i=1}^\infty \pi_i \delta_{X_i} (x) \] has the random probability specification \[ \Pr[Y = X_i] = \pi_i \qquad i=1,2,\ldots \] We can easily draw a sample of $Y$ values <>= n<-10 id<-sample(1:M,size=n,prob=pi.mat[1,],rep=T) Y<-X.mat[1,id] round(Y,5) @ \framebreak There are \emph{ties} in this sample <>= n<-100 id<-sample(1:M,size=n,prob=pi.mat[1,],rep=T) table(round(X.mat[1,id],5)) round(cbind(X.mat[1,1:8],pi.mat[1,1:8]),5) @ \framebreak The \emph{Polya Urn} approach for simulating $Y$: \begin{enumerate}[1.] \item simulate $Y_1 \sim G_X$; \item for $i=2,3,\ldots,n$ generate $Y_i$ from the \emph{mixture distribution} \[ \frac{\alpha}{\alpha + i-1} G_X(.) + \frac{1}{\alpha+i-1} \sum_{j=1}^{i-1} \delta_{Y_j}(.) \] that is \begin{itemize} \item with probability $\alpha/(\alpha+i-1)$, simulate $Y_i \sim G_X(.)$; \item with probability $1/(\alpha+i-1)$, simulate $Y_i$ \emph{uniformly} on $\{Y_1,\ldots,Y_{i-1}\}$. \end{itemize} \end{enumerate} \framebreak <>= #Polya urn n<-100 Y<-numeric(n) Y[1]<-rnorm(1,0,sig) for(i in 2:n){ u<-runif(1) if(u < alpha/(alpha+i-1)){ Y[i]<-rnorm(1,0,sig) }else{ Y[i]<-sample(Y[1:(i-1)],size=1) } } table(round(Y,5)) @ \framebreak <>= par(mar=c(4,4,2,0)) tval<-expression(paste('Sample from ', widetilde(f),' via Polya Urn')) hist(Y,col='gray',breaks=seq(-5,5,by=0.1), main=tval) box() @ \framebreak \emph{Posterior calculation:} \begin{itemize} \item Data: $y_1,y_2,\ldots,y_n$ iid from $\widetilde{f}$ \item Prior: $\widetilde{f} \sim DP(\alpha,G_X)$; \item Posterior: $\widetilde{f} \sim DP(\alpha^\star,G_X^\star)$: for set $B$, \begin{align*} \alpha^\star & = \alpha + n\\[6pt] G_X^\star(B) & = \frac{\alpha}{\alpha+n} G_X(B) + \frac{1}{\alpha+n} \sum\limits_{j=1}^n \delta_{y_j}(B) \end{align*} \end{itemize} In prior and posterior, the unknown $\widetilde{f}$ is almost surely \emph{discrete}. \framebreak \emph{Posterior sampling:} for the data generating model, we specify that \[ Y \sim Gamma(3,1/2) \] but retain the prior base measure $G_X \equiv Normal(0,2^2)$. <>= n<-100 Y<-rgamma(n,3,0.5) al.star<-alpha+n nsamp<-2000 pi.samp<-X.samp<-matrix(0,nrow=nsamp,ncol=M) for(i in 1:nsamp){ V<-rbeta(M,1,al.star) pi.samp[i,]<-c(V[1],cumprod(1-V[-M])*V[-1]) tmp.samp1<-rnorm(M,0,sig) tmp.samp2<-sample(Y,size=M,rep=T) u<-runif(M) X.samp[i,]<-(u < alpha/al.star)*tmp.samp1+ (u > alpha/al.star)*tmp.samp2 } @ \framebreak <>= par(mar=c(4,4,2,0)) xv<-seq(-4,20,by=0.01) yv<-pgamma(xv,3,0.5) plot(xv,yv,type='n',ylab='Prob',xlab='y') for(i in 1:50){ X.vec<-X.samp[i,] pi.vec<-pi.samp[i,order(X.vec)] X.vec<-sort(X.vec) lines(X.vec,cumsum(pi.vec),type='s') } lines(xv,yv,col='red') @ \framebreak \begin{itemize} \item The true data generating distribution is \emph{continuous}, but the \emph{discrete} distributions sampled in the posterior provide a good approximation; \item The prior and posterior place non-zero probability on mass functions with support on \emph{negative values}. \framebreak \item for large $n$, \[ G_X^\star(B) \bumpeq \frac{1}{n} \sum\limits_{j=1}^n \delta_{y_j}(B) \] \end{itemize} so $G_X$ concentrates on the \emph{empirical cdf}, and the posterior can be sampled by generating probabilities \[ \pi \sim Dirichlet(n;1,\ldots,1) \] and placing them on the \emph{observed data} $\{y_1,\ldots,y_n\}$. \end{frame} \end{document}