\documentclass[notitlepage]{article} \usepackage{../../Math598} \usepackage{listings} \usepackage{numprint} \usepackage{enumerate} \usepackage{multirow} \usepackage{bbm} \usepackage{bbold} \usepackage{bm} \usepackage{amsfonts,amsmath,amssymb} \setlength{\parindent}{0pt} \lstloadlanguages{R} \definecolor{keywordcolor}{rgb}{0,0.6,0.6} \definecolor{delimcolor}{rgb}{0.461,0.039,0.102} \definecolor{Rcommentcolor}{rgb}{0.101,0.043,0.432} \lstdefinestyle{Rsettings}{ basicstyle=\ttfamily, breaklines=true, showstringspaces=false, keywords={if, else, function, theFunction, tmp}, % Write as many keywords otherkeywords={}, commentstyle=\itshape\color{Rcommentcolor}, keywordstyle=\color{keywordcolor}, moredelim=[s][\color{delimcolor}]{"}{"}, } \lstset{basicstyle=\ttfamily, numbers=none, literate={~} {$\sim$}{2}} \begin{document} <>= library(knitr) # global chunk options opts_chunk$set(cache=TRUE, autodep=TRUE) options(scipen=999) options(repos=c(CRAN="https://cloud.r-project.org/")) inline_hook <- function (x) { if (is.numeric(x)) { # ifelse does a vectorized comparison # If integer, print without decimal; otherwise print two places res <- ifelse(x == round(x), sprintf("%6i", x), sprintf("%.6f", x) ) paste(res, collapse = ", ") } } knit_hooks$set(inline = inline_hook) @ \begin{center} \textsclarge{MATH 598: TOPICS IN STATISTICS} \vspace{0.1 in} \textsc{Bayesian nonparametric methods} \end{center} The \textit{Dirichlet Process} is a probability distribution on the space of discrete distributions on $\R$ characterized by \begin{itemize} \item a base measure, $G_X$, which is some probability measure on $\R$, from which are drawn a countable collection of random variables $X_1,X_2,\ldots$. \item a countable collection of random variables $\pi_1,\pi_2,\ldots$ with $\pi_i > 0$ such that $\sum\limits_{i=1}^\infty \pi_i = 1$ that is constructed in the following way: $\pi_1 = V_1 \sim Beta(1,\alpha)$ and \[ \pi_i = V_i \prod_{j=1}^{i-1} (1-V_j) \] where for each $j$, $V_j \sim Beta(1,\alpha)$ for some parameter $\alpha > 0$. Note that by construction, in expectation $\pi_1 > \pi_2 > \cdots$, which facilitates truncation at some finite value $M$. <>= M<-41 alpha<-2 pi.mat<-matrix(0,nrow=1000,ncol=M) for(i in 1:1000){ V<-rbeta(M,1,alpha) pi.mat[i,]<-c(V[1],cumprod(1-V[-M])*V[-1]) } par(mar=c(2,2,3,0)) boxplot(pi.mat,pch=19,cex=0.5,ylim=range(0,1),cex.axis=0.6) title(expression(paste('Boxplot of stick-breaking draws of ', pi)),line=1) @ \end{itemize} The collections $\{X_1,X_2,\ldots\}$ and $\{\pi_1,\pi_2,\ldots,\}$ define a random discrete distribution with mass function $\widetilde f$ where \[ \widetilde f(x) = \sum_{i=1}^\infty \pi_i \delta_{X_i} (x) \] where $\delta_{x_i} (x) = 1 \Longleftrightarrow x = x_i$. We write $\widetilde f \sim DP(\alpha,G_X)$ to denote that $\widetilde f$ is drawn from the Dirichlet Process. <>= M<-1001 alpha<-2 set.seed(1010) #Stick-breaking Xi<-rnorm(M,0,10) #Base measure V<-rbeta(M,1,alpha) pi.vec<-c(V[1],cumprod(1-V[-M])*V[-1]) Xi1<-Xi pi1<-pi.vec par(mar=c(4,4,1,0)) plot(Xi1,pi1,type="n",ylim=range(0,1),ylab=expression(pi),xlim=range(-30,30)) points(Xi1,pi1,pch=19,xlim=range(-30,30),cex=0.5) for(i in 1:M){lines(c(Xi1[i],Xi1[i]),c(0,pi1[i]),lty=2)} @ <>= par(mar=c(4,4,1,0)) plot(Xi1,pi1,type="n",ylim=range(0,1),ylab=expression(pi),xlim=range(-30,30)) points(Xi[pi1>1e-6],pi.vec[pi1>1e-6],pch=19,xlim=range(-30,30),cex=0.5) for(i in 1:M){ if(pi1[i] < 1e-6) next lines(c(Xi1[i],Xi1[i]),c(0,pi1[i]),lty=2) } nval<-sum(pi1>1e-6) text(-30,1,substitute(paste(nv,' points greater than ',10^{-6}),list(nv=nval)),adj=0) Xi<-rnorm(M,0,10) V<-rbeta(M,1,alpha) pi.vec<-c(V[1],cumprod(1-V[-M])*V[-1]) Xi2<-Xi pi2<-pi.vec Xi<-rnorm(M,0,10) V<-rbeta(M,1,alpha) W<-1-V pi.vec<-c(V[1],cumprod(1-V[-M])*V[-1]) Xi3<-Xi pi3<-pi.vec par(mar=c(4,4,2,0)) plot(Xi1,pi1,type="n",ylim=range(0,1),ylab=expression(pi),xlim=range(-30,30)) title(expression(paste('Three draws from the ',DP(alpha,G[X])))) points(Xi1[pi1>1e-6],pi1[pi1>1e-6],pch=19,xlim=range(-30,30),cex=0.5) for(i in 1:M){ if(pi1[i] < 1e-6) next lines(c(Xi1[i],Xi1[i]),c(0,pi1[i]),lty=2) } points(Xi2[pi2>1e-6],pi2[pi2>1e-6],pch=19,xlim=range(-30,30),col="red",cex=0.5) for(i in 1:M){ if(pi2[i] < 1e-6) next lines(c(Xi2[i],Xi2[i]),c(0,pi2[i]),lty=2,col="red") } points(Xi3[pi3>1e-6],pi3[pi3>1e-6],pch=19,xlim=range(-30,30),col="green",cex=0.5) for(i in 1:M){ if(pi3[i] < 1e-6) next lines(c(Xi3[i],Xi3[i]),c(0,pi3[i]),lty=2,col="green") } @ <>= Xi<-rnorm(M,0,10) V<-rbeta(M,1,alpha) pi.vec<-c(V[1],cumprod(1-V[-M])*V[-1]) XiLarge<-Xi piLarge<-pi.vec xi.sort<-XiLarge[order(XiLarge)] pi.sort<-pi.vec[order(XiLarge)] Pi<-cumsum(pi.sort) par(mar=c(4,4,2,0)) plot(xi.sort,Pi,type="s",ylim=range(0,1),xlab="x",ylab="Cumul. Prob") title(expression(paste('Five replicate draws of the random cdf ',(alpha==2)))) for(i in 1:4){ Xi<-rnorm(M,0,10) V<-rbeta(M,1,alpha) pi.vec<-c(V[1],cumprod(1-V[-M])*V[-1]) XiLarge<-Xi piLarge<-pi.vec xi.sort<-XiLarge[order(XiLarge)] pi.sort<-pi.vec[order(XiLarge)] Pi<-cumsum(pi.sort) lines(xi.sort,Pi,type="s") } xv<-seq(-30,30,length=1001) yv<-pnorm(xv,0,10) lines(xv,yv,col="red",lwd=2) @ The parameter $\alpha$ is a precision parameter; as $\alpha$ increases, the random distribution becomes more concentrated on the base measure. <>= alpha<-100 Xi<-rnorm(M,0,10) V<-rbeta(M,1,alpha) pi.vec<-c(V[1],cumprod(1-V[-M])*V[-1]) XiLarge<-Xi piLarge<-pi.vec xi.sort<-XiLarge[order(XiLarge)] pi.sort<-pi.vec[order(XiLarge)] Pi<-cumsum(pi.sort) par(mar=c(4,4,2,0)) plot(xi.sort,Pi,type="s",ylim=range(0,1),xlab="x",ylab="Cumul. Prob") title(expression(paste('Five replicate draws of the random cdf ',(alpha==100)))) for(i in 1:4){ Xi<-rnorm(M,0,10) V<-rbeta(M,1,alpha) pi.vec<-c(V[1],cumprod(1-V[-M])*V[-1]) XiLarge<-Xi piLarge<-pi.vec xi.sort<-XiLarge[order(XiLarge)] pi.sort<-pi.vec[order(XiLarge)] Pi<-cumsum(pi.sort) lines(xi.sort,Pi,type="s") } xv<-seq(-30,30,length=1001) yv<-pnorm(xv,0,10) lines(xv,yv,col="red",lwd=2) @ The random distribution $\widetilde f$ can be sampled to produce iid data $Y_1,\ldots,Y_n$ in a straightforward fashion. In the simulation below, we simulate $\widetilde f$ with $\alpha = 10$ and $G_X \equiv Normal(0,1)$ <>= M<-1000 alpha<-10 X<-rnorm(M,0,1) V<-rbeta(M,1,alpha) pi.vec<-c(V[1],cumprod(1-V[-M])*V[-1]) n<-1000 Y<-X[sample(1:M,rep=T,size=n,prob=pi.vec)] par(mar=c(4,4,2,0)) hist(Y,col='gray',breaks=seq(-3,3,by=0.1), main=expression(paste('Sample of data from ', widetilde(f))));box() @ Note that in the sample of size $n=\Sexpr{n}$, there are only \Sexpr{length(table(Y))} distinct $Y$ values <>= tabY<-table(Y) names(tabY)<-as.character(round(as.numeric(names(tabY)),5)) tabY @ and thus we observe \textit{clustering}: this is due to the fact that $\widetilde f$ is discrete. \newpage We can also use a \textit{Polya Urn} scheme to sample these data: this recursive scheme proceeds as follows: \begin{enumerate}[1.] \item simulate $Y_1 \sim G_X$; \item for $i=2,3,\ldots,n$ generate $Y_i$ from the mixture distribution \[ \frac{\alpha}{\alpha + i-1} G_X(.) + \frac{1}{\alpha+i-1} \sum_{j=1}^{i-1} \delta_{Y_j}(.) \] that is \begin{itemizeWide} \item with probability $\alpha/(\alpha+i-1)$, simulate $Y_i \sim G_X(.)$; \item with probability $1/(\alpha+i-1)$, simulate $Y_i$ uniformly on $\{Y_1,\ldots,Y_{i-1}\}$. \end{itemizeWide} \end{enumerate} <>= n<-1000 Y<-numeric(n) Y[1]<-rnorm(1,0,1) alpha<-10 for(i in 2:n){ u<-runif(1) if(u < alpha/(alpha+i-1)){ Y[i]<-rnorm(1,0,1) }else{ Y[i]<-sample(Y[1:(i-1)],size=1) } } par(mar=c(4,4,2,0)) hist(Y,col='gray',breaks=seq(-3,3,by=0.1), main=expression(paste('Sample of data from ', widetilde(f),' via Polya Urn')));box() @ Here there are \Sexpr{length(table(Y))} distinct $Y$ values. The Polya Urn method reconstructs a sample of exchangeable observables according to the de Finetti representation \[ f_{Y_1,\ldots,Y_n} (y_1,\ldots,y_n) = \int \prod_{i=1}^n \widetilde f(y_i) \: \pi_0(d \widetilde f) \] by sampling directly from the left hand side using the factorization \[ f_{Y_1,\ldots,Y_n} (y_1,\ldots,y_n) = f_{Y_1}(y_1) \prod_{i=2}^n f_{Y_i|Y_1,\ldots,Y_{i-1}}(y_i|y_1,\ldots,y_{i-1}) \] where \[ f_{Y_i|Y_1,\ldots,Y_{i-1}}(y_i|y_1,\ldots,y_{i-1}) = \frac{\alpha}{\alpha + i-1} g_X(y_i) + \frac{1}{\alpha+i-1} \sum_{j=1}^{i-1} \delta_{Y_j}(y_i) \] where $g_X(.)$ is the density corresponding to $G_X(.)$. \bigskip In a \textit{Dirichlet Process Mixture} model we add another stage that brings in a continuous distribution. For example, could treat each $x_i$ as the location of a normal density, and consider generating a $y$ for each \begin{eqnarray*} x_1,x_2,\ldots ~&\sim& G_X\\[6pt] \pi_1,\pi_2,\ldots &&\text{generated by stick-breaking}.\\[6pt] y_i &\sim & \phi((y_i - x_i)/\sigma) \qquad i=1,2,\ldots \end{eqnarray*} Then the random density function generated takes the form \[ \ftilde(y) = \sum_{i=1}^\infty \pi_i \phi((y - x_i)/\sigma)/\sigma \] that is, an \textbf{infinite mixture model}. In the plot below we have $\alpha=2$, with $G_X(x) \equiv Normal(0,\lambda^2)$ with $\lambda=10$. <>= set.seed(1010) M<-1001 alpha<-2 sigma<-1;lambda<-10 Xi<-rnorm(M,0,lambda) V<-rbeta(M,1,alpha) pi.vec<-c(V[1],cumprod(1-V[-M])*V[-1]) xv<-seq(-30,30,by=0.01) yv<-xv*0 for(i in 1:M){ yv<-yv+pi.vec[i]*dnorm(xv,Xi[i],sigma) } par(mar=c(4,5,2,0)) plot(xv,yv,type='l',xlab='y',ylab=expression(widetilde(f)(y))) points(Xi,Xi*0,cex=2*pi.vec,pch=19) @ \newpage \textbf{Gibbs sampler:} For fully Bayesian inference consider the de Finetti construction \begin{itemizeWide} \item a prior model for $f$ that is $DPM(\alpha,G_X,g_Y;\theta)$ where $\theta$ represents the parameters that appear in $G_X$ and $g_Y$. \item conditional on $f$, data $y_1,y_2,\ldots,y_n \sim f$ independently \end{itemizeWide} We wish to compute the posterior for $f$. We use the hierarchical formulation \begin{eqnarray*} y_j | x_j & \stackrel{\text{ind.}}{\sim} & g_Y(y_j|x_j;\theta) \qquad j=1,\ldots,n\\[6pt] x_1,\ldots,x_n & \sim & DP(\alpha,G_X;\theta)\\[6pt] \theta & \sim & \pi_0(\theta). \end{eqnarray*} The latent variables $x_1,\ldots,x_n$ are also treated as parameters. They can be sampled using an MCMC \textbf{Gibbs sampler} scheme. For $j=1,\ldots,n$, we sample \[ x_j \: | \: \bm{x}_{(j)},\bm{y} \sim w_0 p(x_j|y_j) + \sum_{l \neq j} w_l \delta_{x_l} \] where \begin{itemize} \item $\bm{y} = (y_1,\ldots,y_n)^\transpose$ \item $\bm{x}_{(j)} = (x_1,\ldots,x_{j-1},x_{j+1},\ldots,x_n)^\transpose$. \item $w_0$ is proportional to $\alpha$ times the \textbf{prior predictive} of $y_j$ \item $w_l$ is proportional to the \textbf{likelihood} of $y_j|x_l$ \item $p(x_j|y_j)$ is the \textbf{posterior} for $x_j$ given $y_j$. \end{itemize} \bigskip \textbf{Marginalized sampler:} To be more efficient, we can use the \textbf{clustering} property: suppose that at a given iteration of the MCMC, there are $K$ clusters labelled 1 to $K$, where $K \leq n$. Label the $K$ distinct $x$ values $z_1,\ldots,z_K$ and for each $j$, define the corresponding cluster label $c_j$ where \[ c_j = k \qquad \Longleftrightarrow \qquad x_j = z_k \] We can update the $c_j$s instead of the $x_j$s which will be more computationally efficient; we are clustering $x$s to the \textbf{cluster centres} at the $z$ values. For $i=1,\ldots,n$, let \begin{itemize} \item $n_1(i),\ldots,n_K(i)$ denote the number of items in clusters $1,\ldots,K$ \vspace{0.05 in} \item $\bm{y}_1(i),\ldots,\bm{y}_K(i)$ denote the vectors of $y$ values currently allocated to the $K$ clusters \end{itemize} \textbf{if the $i$th data point is removed}. For $i=1,\ldots,n$, we sample the cluster labels in a Gibbs sampler with conditional probabilities \[ \Pr[c_i = k \: | \: c_{(i)} ] \propto \frac{n_k(i)}{\alpha + n-1} p(y_i|\bm{y}_k(i)) \qquad k=1,\ldots,K \] and \[ \Pr[c_i = K+1 \: | \: c_{(i)} ] \propto \frac{\alpha}{\alpha + n-1} p(y_i) \] In this expression \begin{itemize} \item $p(y_i|\bm{y}_k(i))$ is the \textbf{posterior predictive} density for $y_i$, assuming that $y_i$ comes from cluster $k$. \vspace{0.1 in} \item $p(y_i)$ is the \textbf{prior predictive} density for $y_i$, assuming that $y_i$ comes from a \textbf{new cluster} not currently represented in the data. \end{itemize} In this formulation, we have \textbf{integrated out} the Dirichlet Process analytically. Thus we can simply sample the cluster labels in turn, and then sample the $z_1,\ldots,z_k$ values; this will allow us to do density estimation. By the usual calculation \[ p(y_i|\bm{y}_k(i)) = \int g_Y(y_i \:|\: x) p(x \:|\: \bm{y}_k(i)) \: d x \] where \begin{eqnarray*} p(x \:|\: \bm{y}_k(i)) & \propto & p(\bm{y}_k(i) \:|\: x ) p(x) =\left\{\prod_{l \neq i} g_Y(y_l \:|\: x ) \right\} p(x) \end{eqnarray*} gives the posterior distribution for the $k$th cluster centre. Similarly \[ p(y_i) = \int g_Y(y_i \:|\: x) p(x) \: d x \] In the earlier Normal model, suppose for simplicity that $G_X$ is the $Normal(0,\lambda^2)$ density: \[ x_i \sim Normal(0,\lambda^2) \qquad \qquad y_i \: | \:x_i \sim Normal(x_i,\sigma^2) \] Then by standard calculations \[ p(y_i|\bm{y}_k(i)) \equiv Normal\left(\frac{n_k(i) \overline{y}_k(i)/\sigma^2}{n_k(i)/\sigma^2+1/\lambda^2}, \frac{(n_k(i)+1)/\sigma^2+1/\lambda^2}{n_k(i)/\sigma^2+1/\lambda^2} \sigma^2 \right) \] and \[ p(y_i) \equiv Normal\left(0,\sigma^2+\lambda^2 \right). \] The marginalized version of the algorithm often mixes more quickly than the Gibbs sampler on the cluster centres. <>= set.seed(1091) n<-200 alpha<-2 sigma<-1; lambda<-10 X<-numeric(n) X[1]<-rnorm(1,0,lambda) for(j in 2:n){ u<-runif(1) if(u < alpha/(alpha+j-1)){ X[j]<-rnorm(1,0,lambda) }else{ X[j]<-sample(X[1:(j-1)],size=1) } } Y<-rnorm(n,X,sigma) par(mar=c(4,4,2,0)) hist(Y,breaks=seq(-20,20,by=0.50),ylim=range(0,20),col='gray',main='Simulated data');box() True.K<-length(table(X)) ##################################################### set.seed(1010) index<-c(1:n) Kval<-5 Yq<-quantile(Y,prob=c(0:Kval)/Kval) Cvec<-as.numeric(cut(Y,Yq,include.lowest=T)) Ysum.Vec<-rep(0,Kval) for(k in 1:Kval){Ysum.Vec[k]<-sum(Y[Cvec==k])} Nvec<-as.numeric(table(Cvec)) nreps<-10000 Nsamp<-rep(0,nreps) yv<-seq(-20,20,by=0.01) fyv.samp<-numeric() for(irep in 1:nreps){ for(i in 1:n){ sum.Vec<-Ysum.Vec sum.Vec[Cvec[i]]<-sum.Vec[Cvec[i]]-Y[i] nvec<-Nvec nvec[Cvec[i]]<-nvec[Cvec[i]]-1 if(nvec[Cvec[i]] > 0){ mvec<-sum.Vec/nvec Mvec<-(sum.Vec/sigma^2)/(nvec/sigma^2+1/lambda^2) Svec<-sigma^2*((nvec+1)/sigma^2+1/lambda^2)/(nvec/sigma^2+1/lambda^2) pvec<-nvec*dnorm(rep(Y[i],Kval),Mvec,sqrt(Svec)) pvec<-c(pvec,alpha*dnorm(Y[i],0,sqrt(sigma^2 + lambda^2)))/(n-1+alpha) pvec<-pvec/sum(pvec) Ksamp<-sample(c(1:(Kval+1)),size=1,prob=pvec) Cvec[i]<-Ksamp if(Ksamp == Kval+1){ Kval<-Kval+1 Ysum.Vec<-c(sum.Vec,Y[i]) Nvec<-c(nvec,1) }else{ Ysum.Vec<-sum.Vec Ysum.Vec[Ksamp]<-sum.Vec[Ksamp]+Y[i] Nvec<-nvec Nvec[Ksamp]<-nvec[Ksamp]+1 } }else{ Ktmp<-Cvec[i] sum.Vec<-sum.Vec[-Ktmp] nvec<-nvec[-Ktmp] Kval<-Kval-1 Cvec[Cvec > Ktmp]<-Cvec[Cvec > Ktmp]-1 mvec<-sum.Vec/nvec Mvec<-(sum.Vec/sigma^2)/(nvec/sigma^2+1/lambda^2) Svec<-sigma^2*((nvec+1)/sigma^2+1/lambda^2)/(nvec/sigma^2+1/lambda^2) pvec<-nvec*dnorm(rep(Y[i],Kval),Mvec,sqrt(Svec)) pvec<-c(pvec,alpha*dnorm(Y[i],0,sqrt(sigma^2 + lambda^2)))/(n-1+alpha) pvec<-pvec/sum(pvec) Ksamp<-sample(c(1:(Kval+1)),size=1,prob=pvec) Cvec[i]<-Ksamp if(Ksamp == Kval+1){ Kval<-Kval+1 Ysum.Vec<-c(sum.Vec,Y[i]) Nvec<-c(nvec,1) }else{ Ysum.Vec<-sum.Vec Ysum.Vec[Ksamp]<-sum.Vec[Ksamp]+Y[i] Nvec<-nvec Nvec[Ksamp]<-nvec[Ksamp]+1 } } } Nsamp[irep]<-Kval if(irep %% 500 == 0){ MuVec<-(Ysum.Vec/sigma^2)/(Nvec/sigma^2+1/lambda^2) SigVec<-1/(Nvec/sigma^2+1/lambda^2) Xvec<-rnorm(Kval)*sqrt(SigVec)+MuVec fyv<-yv*0 for(k in 1:Kval){ fyv<-fyv+Nvec[k]*dnorm(yv,Xvec[k],sigma)/n } fyv.samp<-cbind(fyv.samp,fyv) } } @ <>= par(mar=c(4,4,2,0)) hist(Y,breaks=seq(-20,20,by=0.5),col='gray',ylim=range(0,20),main='Posterior density estimates');box() for(i in 1:ncol(fyv.samp)){lines(yv,fyv.samp[,i]*n*0.5,col='red')} hist(Nsamp,col='gray',breaks=seq(0,50,by=1),xlab='K',main='Posterior sample for K');box() @ <>= #Galaxy data using Gibbs sampler Y.galaxy<-MASS::galaxies/10000 n.galaxy<-length(Y.galaxy) set.seed(1010) index<-c(1:n.galaxy) nreps<-10000 alpha<-2.0 Pi<-Y.galaxy Prior.K.galaxy<-numeric() for(irep in 1:nreps){ for(i in 1:n.galaxy){ pvec<-rep(1/(alpha+n.galaxy-1),n.galaxy) pvec[i]<-alpha/(alpha+n.galaxy-1) ival<-sample(index,size=1,prob=pvec) if(ival == i){ Pi[i]<-rnorm(1,0,lambda) }else{ Pi[i]<-Pi[ival] } } Kval<-length(table(Pi)) Prior.K.galaxy<-c(Prior.K.galaxy,Kval) } set.seed(1010) index<-c(1:n.galaxy) nreps<-10000 sigma<-0.1 lambda<-0.5 Pi<-Y.galaxy Pvec<-rep(0,n.galaxy) Posterior.K.galaxy<-numeric() yv<-seq(0,4,by=0.01) fyv.samp.GAL<-numeric() for(irep in 1:nreps){ for(i in 1:n.galaxy){ pvec<-dnorm(Y.galaxy[i],Pi,rep(sigma,n.galaxy)) pvec[i]<-alpha*dnorm(Y.galaxy[i],0,sqrt(sigma^2+lambda^2)) ival<-sample(index,size=1,prob=pvec) if(ival == i){ muval<-(Y.galaxy[i]/sigma^2)/(1/sigma^2+1/lambda^2) sval<-sqrt(1/(1/sigma^2+1/lambda^2)) Pi[i]<-rnorm(1,muval,sval) }else{ Pi[i]<-Pi[ival] } } Kval<-length(table(Pi)) Posterior.K.galaxy<-c(Posterior.K.galaxy,Kval) if(irep %% 100 == 0){ fyv<-yv*0 for(k in 1:n.galaxy){ fyv<-fyv+dnorm(yv,Pi[k],sigma)/n.galaxy } fyv.samp.GAL<-cbind(fyv.samp.GAL,fyv) } } @ <>= table(Posterior.K.galaxy)/length(Posterior.K.galaxy) table(Prior.K.galaxy)/length(Prior.K.galaxy) @ <>= par(mar=c(4,4,2,0)) boxplot(c(Prior.K.galaxy,Posterior.K.galaxy)~rep(c(1,2),each=nreps), names=c("Prior","Posterior"),xlab='',ylab='K',main="Posterior for K",ylim=range(0,20)) hist(Y.galaxy,breaks=seq(0,4.0,by=0.05),ylim=range(0,15),col='gray', main="Galaxy Data",xlab="Velocity");box() for(i in 1:20){lines(yv,fyv.samp.GAL[,i]*n.galaxy*0.05,lwd=1,col='red')} @ <>= #Galaxy data using marginalized algorithm Y.galaxy<-MASS::galaxies/10000 n.galaxy<-length(Y.galaxy) set.seed(1010) index<-c(1:n.galaxy) nreps<-10000 alpha<-2.0 sigma<-0.1 lambda<-0.5 index<-c(1:n.galaxy) Kval<-5 Yq<-quantile(Y.galaxy,prob=c(0:Kval)/Kval) Cvec<-as.numeric(cut(Y.galaxy,Yq,include.lowest=T)) Ysum.Vec<-rep(0,Kval) for(k in 1:Kval){Ysum.Vec[k]<-sum(Y.galaxy[Cvec==k])} Nvec<-as.numeric(table(Cvec)) Posterior.K.marg<-rep(0,nreps) yv<-seq(0,4,by=0.01) marg.fyv.samp.GAL<-numeric() for(irep in 1:nreps){ for(i in 1:n.galaxy){ sum.Vec<-Ysum.Vec sum.Vec[Cvec[i]]<-sum.Vec[Cvec[i]]-Y.galaxy[i] nvec<-Nvec nvec[Cvec[i]]<-nvec[Cvec[i]]-1 if(nvec[Cvec[i]] > 0){ mvec<-sum.Vec/nvec Mvec<-(sum.Vec/sigma^2)/(nvec/sigma^2+1/lambda^2) Svec<-sigma^2*((nvec+1)/sigma^2+1/lambda^2)/(nvec/sigma^2+1/lambda^2) pvec<-nvec*dnorm(rep(Y.galaxy[i],Kval),Mvec,sqrt(Svec)) pvec<-c(pvec,alpha*dnorm(Y.galaxy[i],0,sqrt(sigma^2 + lambda^2))) pvec<-pvec/sum(pvec) Ksamp<-sample(c(1:(Kval+1)),size=1,prob=pvec) Cvec[i]<-Ksamp if(Ksamp == Kval+1){ Kval<-Kval+1 Ysum.Vec<-c(sum.Vec,Y.galaxy[i]) Nvec<-c(nvec,1) }else{ Ysum.Vec<-sum.Vec Ysum.Vec[Ksamp]<-sum.Vec[Ksamp]+Y.galaxy[i] Nvec<-nvec Nvec[Ksamp]<-nvec[Ksamp]+1 } }else{ Ktmp<-Cvec[i] sum.Vec<-sum.Vec[-Ktmp] nvec<-nvec[-Ktmp] Kval<-Kval-1 Cvec[Cvec > Ktmp]<-Cvec[Cvec > Ktmp]-1 mvec<-sum.Vec/nvec Mvec<-(sum.Vec/sigma^2)/(nvec/sigma^2+1/lambda^2) Svec<-sigma^2*((nvec+1)/sigma^2+1/lambda^2)/(nvec/sigma^2+1/lambda^2) pvec<-nvec*dnorm(rep(Y.galaxy[i],Kval),Mvec,sqrt(Svec)) pvec<-c(pvec,alpha*dnorm(Y.galaxy[i],0,sqrt(sigma^2 + lambda^2)))/(n.galaxy-1+alpha) pvec<-pvec/sum(pvec) Ksamp<-sample(c(1:(Kval+1)),size=1,prob=pvec) Cvec[i]<-Ksamp if(Ksamp == Kval+1){ Kval<-Kval+1 Ysum.Vec<-c(sum.Vec,Y.galaxy[i]) Nvec<-c(nvec,1) }else{ Ysum.Vec<-sum.Vec Ysum.Vec[Ksamp]<-sum.Vec[Ksamp]+Y.galaxy[i] Nvec<-nvec Nvec[Ksamp]<-nvec[Ksamp]+1 } } } Posterior.K.marg[irep]<-Kval if(irep %% 100 == 0){ MuVec<-(Ysum.Vec/sigma^2)/(Nvec/sigma^2+1/lambda^2) SigVec<-1/(Nvec/sigma^2+1/lambda^2) Xvec<-rnorm(Kval)*sqrt(SigVec)+MuVec fyv<-yv*0 for(k in 1:Kval){ fyv<-fyv+Nvec[k]*dnorm(yv,Xvec[k],sigma)/n.galaxy } marg.fyv.samp.GAL<-cbind(marg.fyv.samp.GAL,fyv) } } @ <>= table(Posterior.K.marg)/nreps table(Prior.K.galaxy)/length(Prior.K.galaxy) @ <>= par(mar=c(4,4,2,0)) boxplot(c(Prior.K.galaxy,Posterior.K.marg)~rep(c(1,2),each=nreps), names=c("Prior","Posterior"),xlab='',ylab='K',main="Posterior for K",ylim=range(0,20)) hist(Y.galaxy,breaks=seq(0,4.0,by=0.05),ylim=range(0,15),col='gray', main="Galaxy Data",xlab="Velocity");box() for(i in 1:20){lines(yv,marg.fyv.samp.GAL[,i]*n.galaxy*0.05,lwd=1,col='red')} @ \end{document}