\documentclass[notitlepage]{article} \usepackage{Math598} \usepackage{listings} \usepackage{numprint} \usepackage{enumerate} \usepackage{bbm} \usepackage{verbatim} \usepackage{amsfonts,amsmath} \usepackage{amssymb} \def\E{\Expect} \newcommand{\independent}{\perp\mkern-9.5mu\perp} \lstloadlanguages{R} \definecolor{keywordcolor}{rgb}{0,0.6,0.6} \definecolor{delimcolor}{rgb}{0.461,0.039,0.102} \definecolor{Rcommentcolor}{rgb}{0.101,0.043,0.432} \lstdefinestyle{Rsettings}{ basicstyle=\ttfamily, breaklines=true, showstringspaces=false, keywords={if, else, function, theFunction, tmp}, % Write as many keywords otherkeywords={}, commentstyle=\itshape\color{Rcommentcolor}, keywordstyle=\color{keywordcolor}, moredelim=[s][\color{delimcolor}]{"}{"}, } \lstset{basicstyle=\ttfamily, numbers=none, literate={~} {$\sim$}{2}} \parindent0in \begin{document} <>= library(knitr) # global chunk options opts_chunk$set(cache=TRUE, autodep=TRUE) options(scipen=999) options(repos=c(CRAN="https://cloud.r-project.org/")) inline_hook <- function (x) { if (is.numeric(x)) { # ifelse does a vectorized comparison # If integer, print without decimal; otherwise print two places res <- ifelse(x == round(x), sprintf("%6i", x), sprintf("%.2f", x) ) paste(res, collapse = ", ") } } knit_hooks$set(inline = inline_hook) @ \begin{center} {\textsclarge{Causal Contrasts from Experimental Study Data}} \end{center} In a randomized experimental study, inference concerning the causal effect of binary variable $Z$ on $Y$ can be made by direct comparison of sample averages. Suppose that $Z \sim Bernoulli(p)$ for $0>= set.seed(23) nreps<-10000;nvec<-seq(50,2000,by=50) p<-0.2;delta<-2;al<-0.5;sig<-1 muX<-1;sigX<-1 mu0<-al*muX; mu1<-mu0+delta ests.mat<-array(0,c(length(nvec),nreps,2)) for(i in 1:length(nvec)){ n<-nvec[i] for(irep in 1:nreps){ X<-rnorm(n,muX,sigX) Z<-rbinom(n,1,p) Y<-rnorm(n,delta*Z+al*X,sig) p.hat<-mean(Z) ests.mat[i,irep,1]<-sum(Z*Y)/(n*p)-sum((1-Z)*Y)/(n*(1-p)) ests.mat[i,irep,2]<-sum(Z*Y)/(n*p.hat)-sum((1-Z)*Y)/(n*(1-p.hat)) } } #Variance of delta tilde true.var.tilde<-((sig^2+al^2*sigX^2+(p*mu0+(1-p)*mu1)^2)/(p*(1-p))) true.var.tilde #Variance of delta hat true.var.hat<-(sig^2+al^2*sigX^2)/(p*(1-p)) true.var.hat #Efficiency true.var.tilde/true.var.hat @ <>= par(mar=c(4,2,3,1),pch=19) boxplot(t(ests.mat[,,1]),ylim=range(0.5,3),cex=0.5,names=nvec,cex.axis=0.75); abline(h=delta,lty=2,col='red') title(expression(paste('Variability of ',widetilde(delta)))) boxplot(t(ests.mat[,,2]),ylim=range(0.5,3),cex=0.5,names=nvec,cex.axis=0.75); abline(h=delta,lty=2,col='red') title(expression(paste('Variability of ',widehat(delta)))) @ <>= v1<-apply(ests.mat[,,1],1,var) #Variance for tilde delta nvec*v1 par(mar=c(4,4,3,1)) plot(nvec,nvec*v1,type='l',xlab='n',ylab='Variance') points(nvec,nvec*v1,pch=19,cex=0.5) abline(h=true.var.tilde,col='red',lty=2) title(expression(paste('Variance of ',sqrt(n)*widetilde(delta)))) @ <>= v2<-apply(ests.mat[,,2],1,var) #Variance for delta hat nvec*v2 par(mar=c(4,4,3,1)) plot(nvec,nvec*v2,type='l',xlab='n',ylab='Variance') points(nvec,nvec*v2,pch=19,cex=0.5) abline(h=true.var.hat,col='red',lty=2) title(expression(paste('Variance of ',sqrt(n)*widehat(delta)))) @ <>= eff<-apply(ests.mat[,,1],1,var)/apply(ests.mat[,,2],1,var) par(mar=c(4,4,2,1)) plot(nvec,eff,type='l',xlab='n',ylab='Efficiency') points(nvec,eff,pch=19,cex=0.5) abline(h=true.var.tilde/true.var.hat,col='red',lty=2) title('Efficiency') @ The efficiency is the ratio of variances of the two estimators: $\Var[\widetilde \delta]/\Var[\widehat \delta]$. \pagebreak \textbf{Simulation 2:} In the simulation below, we assume that \[ X \sim Normal(\mu_X,\sigma_X^2) \] with $\mu_X = 1$ and $\sigma_X = 1$, and \[ Y | X = x, Z = z \sim Normal( \alpha x + \delta z, \sigma^2 (x,z)) \] with $\sigma^2(x,z) = \sigma^2(1 + z)$, and \[ \mu(x,z) = \E_{Y|X,Z}[Y|X=x,Z=z] = \alpha x + \delta z \] with $\alpha = 0.5$ and $\delta = 2$, and hence \[ \E[Y(1) - Y(0)] = \E_{Y|X,Z}[Y|X=x,Z=1] - \E_{Y|X,Z}[Y|X=x,Z=0] = \delta . \] We also have \[ \mu(z) = \E_X[\mu(X,z)] = \E_X[\alpha X + \delta z] = \alpha \E_X[X] + \delta z = \alpha \mu_X + \delta z \] and that \begin{align*} \sigma^2(z) & = \E_X[\sigma^2(X,z)] + \Var_X[\mu(X,z)] \\[6pt] & = \sigma^2 (1 + z) + \Var_X[\alpha X + \delta z] \\[6pt] & = \sigma^2 (1 + z) + \alpha^2 \Var_X[X]\\[6pt] & = \sigma^2 (1 + z) + \alpha^2 \sigma_X^2 \end{align*} \medskip We study how the variances change as $n$ increases. We choose $p=0.2$. <>= set.seed(23) nreps<-10000;nvec<-seq(50,2000,by=50) p<-0.2;delta<-2;al<-0.5;sig<-1 muX<-1;sigX<-1 mu0<-al*muX; mu1<-mu0+delta sigs<-c(0,0) sigs[1]<-sqrt(sig^2*(1)+al^2*sigX^2) sigs[2]<-sqrt(sig^2*(1+1)+al^2*sigX^2) ests.mat<-array(0,c(length(nvec),nreps,2)) for(i in 1:length(nvec)){ n<-nvec[i] for(irep in 1:nreps){ X<-rnorm(n,muX,sigX) Z<-rbinom(n,1,p) Y<-rnorm(n,delta*Z+al*X,sigs[Z+1]) p.hat<-mean(Z) ests.mat[i,irep,1]<-sum(Z*Y)/(n*p)-sum((1-Z)*Y)/(n*(1-p)) ests.mat[i,irep,2]<-sum(Z*Y)/(n*p.hat)-sum((1-Z)*Y)/(n*(1-p.hat)) } } #Variance of delta tilde true.var.tilde<-(((1-p)*sigs[2]^2+p*sigs[1]^2)+al^2*sigX^2+(p*mu0+(1-p)*mu1)^2)/(p*(1-p)) true.var.tilde #Variance of delta hat true.var.hat<-(((1-p)*sigs[2]^2+p*sigs[1]^2)+al^2*sigX^2)/(p*(1-p)) true.var.hat #Efficiency true.var.tilde/true.var.hat @ <>= par(mar=c(4,2,3,1),pch=19) boxplot(t(ests.mat[,,1]),ylim=range(0.5,3),cex=0.5,names=nvec,cex.axis=0.75); abline(h=delta,lty=2,col='red') title(expression(paste('Variability of ',widetilde(delta)))) boxplot(t(ests.mat[,,2]),ylim=range(0.5,3),cex=0.5,names=nvec,cex.axis=0.75); abline(h=delta,lty=2,col='red') title(expression(paste('Variability of ',widehat(delta)))) @ <>= v1<-apply(ests.mat[,,1],1,var) #Variance for tilde delta nvec*v1 par(mar=c(4,4,3,1)) plot(nvec,nvec*v1,type='l',xlab='n',ylab='Variance') points(nvec,nvec*v1,pch=19,cex=0.5) abline(h=true.var.tilde,col='red',lty=2) title(expression(paste('Variance of ',sqrt(n)*widetilde(delta)))) @ <>= v2<-apply(ests.mat[,,2],1,var) #Variance for delta hat nvec*v2 par(mar=c(4,4,3,1)) plot(nvec,nvec*v2,type='l',xlab='n',ylab='Variance') points(nvec,nvec*v2,pch=19,cex=0.5) abline(h=true.var.hat,col='red',lty=2) title(expression(paste('Variance of ',sqrt(n)*widehat(delta)))) @ <>= eff<-apply(ests.mat[,,1],1,var)/apply(ests.mat[,,2],1,var) par(mar=c(4,4,2,1)) plot(nvec,eff,type='l',xlab='n',ylab='Efficiency') points(nvec,eff,pch=19,cex=0.5) abline(h=true.var.tilde/true.var.hat,col='red',lty=2) title('Efficiency') @ \pagebreak \textbf{Simulation 3:} In the simulation below, we assume that \[ X \sim Normal(\mu_X,\sigma_X^2) \] with $\mu_X = 1$ and $\sigma_X = 1$, and \[ Y | X = x, Z = z \sim Normal( \alpha x + \delta z, \sigma^2 (x,z)) \] with $\sigma^2(x,z) = \sigma^2(x^2 + z)$, and \[ \mu(x,z) = \E_{Y|X,Z}[Y|X=x,Z=z] = \alpha x + \delta z \] with $\alpha = 0.5$ and $\delta = 2$, and hence \[ \E[Y(1) - Y(0)] = \E_{Y|X,Z}[Y|X=x,Z=1] - \E_{Y|X,Z}[Y|X=x,Z=0] = \delta . \] We also have \[ \mu(z) = \E_X[\mu(X,z)] = \E_X[\alpha X + \delta z] = \alpha \E_X[X] + \delta z = \alpha \mu_X + \delta z \] and that \begin{align*} \sigma^2(z) & = \E_X[\sigma^2(X,z)] + \Var_X[\mu(X,z)] \\[6pt] & = \sigma^2 (\E_X[X^2] + z) + \Var_X[\alpha X + \delta z] \\[6pt] & = \sigma^2 (\mu_X^2+\sigma_X^2 + z) + \alpha^2 \Var_X[X]\\[6pt] & = \sigma^2 (\mu_X^2+\sigma_X^2 + z) + \alpha^2 \sigma_X^2 \end{align*} \medskip We study how the variances change as $n$ increases. We choose $p=0.2$. <>= set.seed(23) nreps<-10000;nvec<-seq(50,2000,by=50) p<-0.2;delta<-2;al<-0.5;sig<-1 muX<-1;sigX<-1 mu0<-al*muX; mu1<-mu0+delta sigs<-c(0,0) sigs[1]<-sqrt(sig^2*(muX^2+sigX^2)+al^2*sigX^2) sigs[2]<-sqrt(sig^2*(muX^2+sigX^2+1)+al^2*sigX^2) ests.mat<-array(0,c(length(nvec),nreps,2)) for(i in 1:length(nvec)){ n<-nvec[i] for(irep in 1:nreps){ X<-rnorm(n,muX,sigX) Z<-rbinom(n,1,p) Y<-rnorm(n,delta*Z+al*X,sigs[Z+1]) p.hat<-mean(Z) ests.mat[i,irep,1]<-sum(Z*Y)/(n*p)-sum((1-Z)*Y)/(n*(1-p)) ests.mat[i,irep,2]<-sum(Z*Y)/(n*p.hat)-sum((1-Z)*Y)/(n*(1-p.hat)) } } #Variance of delta tilde true.var.tilde<-(((1-p)*sigs[2]^2+p*sigs[1]^2)+al^2*sigX^2+(p*mu0+(1-p)*mu1)^2)/(p*(1-p)) true.var.tilde #Variance of delta hat true.var.hat<-(((1-p)*sigs[2]^2+p*sigs[1]^2)+al^2*sigX^2)/(p*(1-p)) true.var.hat #Efficiency true.var.tilde/true.var.hat @ <>= par(mar=c(4,2,3,1),pch=19) boxplot(t(ests.mat[,,1]),ylim=range(0.5,3),cex=0.5,names=nvec,cex.axis=0.75); abline(h=delta,lty=2,col='red') title(expression(paste('Variability of ',widetilde(delta)))) boxplot(t(ests.mat[,,2]),ylim=range(0.5,3),cex=0.5,names=nvec,cex.axis=0.75); abline(h=delta,lty=2,col='red') title(expression(paste('Variability of ',widehat(delta)))) @ <>= v1<-apply(ests.mat[,,1],1,var) #Variance for tilde delta nvec*v1 par(mar=c(4,4,3,1)) plot(nvec,nvec*v1,type='l',xlab='n',ylab='Variance') points(nvec,nvec*v1,pch=19,cex=0.5) abline(h=true.var.tilde,col='red',lty=2) title(expression(paste('Variance of ',sqrt(n)*widetilde(delta)))) @ <>= v2<-apply(ests.mat[,,2],1,var) #Variance for delta hat nvec*v2 par(mar=c(4,4,3,1)) plot(nvec,nvec*v2,type='l',xlab='n',ylab='Variance') points(nvec,nvec*v2,pch=19,cex=0.5) abline(h=true.var.hat,col='red',lty=2) title(expression(paste('Variance of ',sqrt(n)*widehat(delta)))) @ <>= eff<-apply(ests.mat[,,1],1,var)/apply(ests.mat[,,2],1,var) par(mar=c(4,4,2,1)) plot(nvec,eff,type='l',xlab='n',ylab='Efficiency') points(nvec,eff,pch=19,cex=0.5) abline(h=true.var.tilde/true.var.hat,col='red',lty=2) title('Efficiency') @ \end{document}