\documentclass[notitlepage]{article} \usepackage{../../../Math598} \usepackage{listings} \usepackage{numprint} \usepackage{enumerate} \usepackage{multirow} \usepackage{bbm} \usepackage{hyperref} \usepackage{amsfonts,amsmath,amssymb} \setlength{\parindent}{0pt} \lstloadlanguages{R} \definecolor{keywordcolor}{rgb}{0,0.6,0.6} \definecolor{delimcolor}{rgb}{0.461,0.039,0.102} \definecolor{Rcommentcolor}{rgb}{0.101,0.043,0.432} \lstdefinestyle{Rsettings}{ basicstyle=\ttfamily, breaklines=true, showstringspaces=false, keywords={if, else, function, theFunction, tmp}, % Write as many keywords otherkeywords={}, commentstyle=\itshape\color{Rcommentcolor}, keywordstyle=\color{keywordcolor}, moredelim=[s][\color{delimcolor}]{"}{"}, } \lstset{basicstyle=\ttfamily, numbers=none, literate={~} {$\sim$}{2}} \begin{document} <>= library(knitr) # global chunk options opts_chunk$set(cache=TRUE, autodep=TRUE) options(scipen=999) options(repos=c(CRAN="https://cloud.r-project.org/")) inline_hook <- function (x) { if (is.numeric(x)) { # ifelse does a vectorized comparison # If integer, print without decimal; otherwise print two places res <- ifelse(x == round(x), sprintf("%.3f", x), sprintf("%.3f", x) ) paste(res, collapse = ", ") } } knit_hooks$set(inline = inline_hook) @ \begin{center} \textsclarge{MATH 559 : Bayesian Theory and Methods} \vspace{0.1 in} \textsc{Selection with the Normal model: Exploring different priors} \end{center} <>= set.seed(2134) n<-20;nreps<-1000 mu0<-2;sigma0<-1 eta<-0; lambda<-1 dsq<-function(xv,ev,lv){ dv<-xv*0 for(j in 1:length(xv)){ dv[j]<-xv[j]-(sum(xv[-j])+ev*lv)/(length(xv)-1+lv) } return(sum(dv^2)) } ssq<-function(xv){ return(sum((xv-mean(xv)^2))) } variance.term<-function(xv,ev,lv,N=10000){ #Monte Carlo calculation en<-(sum(xv)+ev*lv)/(length(xv)+lv) ln<-length(xv)+lv mu<-rnorm(N,en,sqrt(1/ln)) d<-outer(xv,mu,'-') return(mean(apply(dnorm(d,log=T),1,var))) } Y<-matrix(rnorm(n*nreps,2,1),ncol=n) etavec<-seq(-5,5,by=0.1) lambda.n<-n+lambda; lambda.n1<-lambda.n/(1+lambda.n) lambda.ni<-n-1+lambda; lambda.ni1<-lambda.ni/(1+lambda.ni) const<-0.5*log(2*pi)-0.5*log(lambda.n1) stat.mat<-array(0,c(ncol=length(etavec),nreps,4)) print(dim(stat.mat)) for(il in 1:length(etavec)){ eta<-etavec[il] eta.n<-(n*apply(Y,1,mean)+eta*lambda)/lambda.n Tn<-const+0.5*lambda.n1*apply((Y-eta.n)^2,1,sum)/n Gn<-const+0.5*lambda.n1*(1+(eta.n-mu0)^2) Cn<-const+0.5*lambda.ni1*apply(Y,1,dsq,ev=eta,lv=lambda)/n Wn<-Tn+apply(Y,1,variance.term,ev=eta,lv=lambda) stat.mat[il,,]<-cbind(Tn,Gn,Cn,Wn) } @ <>= #lbl<-c(expression(T[n]),expression(G[n]),expression(C[n]),expression(C[n2]),expression(W[n])) par(mar=c(4,4,3,0)) boxplot(t(stat.mat[,,1]),ylim=range(1,2.5),main='Tn',pch=19,cex=0.5) boxplot(t(stat.mat[,,2]),ylim=range(1,2.5),main='Gn',pch=19,cex=0.5) boxplot(t(stat.mat[,,3]),ylim=range(1,2.5),main='Cn',pch=19,cex=0.5) boxplot(t(stat.mat[,,4]),ylim=range(1,2.5),main='Wn',pch=19,cex=0.5) #title('Boxplot of sampled statistic values over 1000 replicates (n=500)') #abline(h=0.5*(log(2*pi)+1),col='red') #legend(1,2.5,c(expression(paste('Limit value :',(log(2*pi)+1)/2))),col='red',lty=1) @ <>= par(mar=c(4,4,3,0)) plot(etavec,apply(stat.mat[,,1],1,mean),type='l',ylab='Statistic value',xlab=expression(eta),ylim=range(1.35,1.5)) lines(etavec,apply(stat.mat[,,2],1,mean),col='red') lines(etavec,apply(stat.mat[,,3],1,mean),col='blue') lines(etavec,apply(stat.mat[,,4],1,mean),col='green') legend(0,1.5,c('Tn','Gn','Cn','Wn'),lty=1,col=c('black','red','blue','green')) title('Average loss over 1000 replicates (n=20)') abline(v=mu0,lty=2) abline(h=(log(2*pi)+1)/2,lty=3) @ <>= set.seed(2134) n<-200;nreps<-1000 mu0<-2;sigma0<-1 eta<-0; lambda<-1 dsq<-function(xv,ev,lv){ dv<-xv*0 nx<-length(xv) tx<-sum(xv) for(j in 1:nx){ dv[j]<-xv[j]-((tx-xv[j])+ev*lv)/(nx-1+lv) } return(sum(dv^2)) } ssq<-function(xv){ return(sum((xv-mean(xv)^2))) } variance.term<-function(xv,ev,lv,N=10000){ #Monte Carlo calculation en<-(sum(xv)+ev*lv)/(length(xv)+lv) ln<-length(xv)+lv mu<-rnorm(N,en,sqrt(1/ln)) d<-outer(xv,mu,'-') return(mean(apply(dnorm(d,log=T),1,var))) } Y<-matrix(rnorm(n*nreps,2,1),ncol=n) etavec<-seq(-5,5,by=0.2) lambda.n<-n+lambda; lambda.n1<-lambda.n/(1+lambda.n) lambda.ni<-n-1+lambda; lambda.ni1<-lambda.ni/(1+lambda.ni) const<-0.5*log(2*pi)-0.5*log(lambda.n1) stat.mat<-array(0,c(ncol=length(etavec),nreps,4)) print(dim(stat.mat)) for(il in 1:length(etavec)){ eta<-etavec[il] eta.n<-(rowSums(Y)+eta*lambda)/lambda.n Tn<-const+0.5*lambda.n1*rowMeans((Y-eta.n)^2) Gn<-const+0.5*lambda.n1*(1+(eta.n-mu0)^2) Cn<-const+0.5*lambda.ni1*apply(Y,1,dsq,ev=eta,lv=lambda)/n Wn<-Tn+apply(Y,1,variance.term,ev=eta,lv=lambda) stat.mat[il,,]<-cbind(Tn,Gn,Cn,Wn) } @ <>= #lbl<-c(expression(T[n]),expression(G[n]),expression(C[n]),expression(C[n2]),expression(W[n])) par(mar=c(4,4,3,0)) boxplot(t(stat.mat[,,1]),ylim=range(1,2.5),main='Tn',pch=19,cex=0.5) boxplot(t(stat.mat[,,2]),ylim=range(1,2.5),main='Gn',pch=19,cex=0.5) boxplot(t(stat.mat[,,3]),ylim=range(1,2.5),main='Cn',pch=19,cex=0.5) boxplot(t(stat.mat[,,4]),ylim=range(1,2.5),main='Wn',pch=19,cex=0.5) #title('Boxplot of sampled statistic values over 1000 replicates (n=500)') #abline(h=0.5*(log(2*pi)+1),col='red') #legend(1,2.5,c(expression(paste('Limit value :',(log(2*pi)+1)/2))),col='red',lty=1) @ <>= par(mar=c(4,4,3,0)) plot(etavec,apply(stat.mat[,,1],1,mean),type='l',ylab='Statistic value',xlab=expression(eta),ylim=range(1.35,1.5)) lines(etavec,apply(stat.mat[,,2],1,mean),col='red') lines(etavec,apply(stat.mat[,,3],1,mean),col='blue') lines(etavec,apply(stat.mat[,,4],1,mean),col='green') legend(0,1.5,c('Tn','Gn','Cn','Wn'),lty=1,col=c('black','red','blue','green')) title('Average loss over 1000 replicates (n=200)') abline(v=mu0,lty=2) abline(h=(log(2*pi)+1)/2,lty=3) @ \end{document}