\documentclass[notitlepage]{article}
\usepackage{Math598}
\usepackage{listings}
\usepackage{numprint}
\usepackage{enumerate}
\usepackage{bbm}
\usepackage{xypic}
\usepackage{amsfonts,amsmath}
\def\E{\Expect}
\lstloadlanguages{R}
\definecolor{keywordcolor}{rgb}{0,0.6,0.6}
\definecolor{delimcolor}{rgb}{0.461,0.039,0.102}
\definecolor{Rcommentcolor}{rgb}{0.101,0.043,0.432}
\lstdefinestyle{Rsettings}{
basicstyle=\ttfamily,
breaklines=true,
showstringspaces=false,
keywords={if, else, function, theFunction, tmp}, % Write as many keywords
otherkeywords={},
commentstyle=\itshape\color{Rcommentcolor},
keywordstyle=\color{keywordcolor},
moredelim=[s][\color{delimcolor}]{"}{"},
}
\lstset{basicstyle=\ttfamily, numbers=none, literate={~} {$\sim$}{2}}
\parindent0in
\begin{document}
\begin{center}
{\textsclarge{Causal Contrasts from Outcome Regression}}
\end{center}
In a randomized experimental study, inference concerning the causal effect of binary variable $Z$ on $Y$ can be made by direct comparison of sample averages. Suppose that $Z \sim Bernoulli(p)$ for $0
>=
set.seed(23)
nreps<-1000;n<-100;p<-0.5;delta<-2
ests.mat<-matrix(0,nrow=nreps,ncol=2)
for(irep in 1:nreps){
X<-rnorm(n,1,1)
Z<-rbinom(n,1,p)
Y<-rnorm(n,delta*Z+0.5*X,1)
p.hat<-mean(Z)
ests.mat[irep,1]<-mean(delta+0.5*X)-mean(0.5*X)
ests.mat[irep,2]<-sum(Z*(delta*Z+0.5*X))/(n*p.hat)-sum((1-Z)*(delta*Z+0.5*X))/(n*(1-p.hat))
}
apply(ests.mat,2,var)
@
\pagebreak
Suppose now that
\[
\E_{Y|X,Z}^\Exp[Y|X=x,Z=z] = 0.5 x + \delta z + 2 x z
\]
so that
\[
\E_{Y|X,Z}^\Exp[Y|X=x,Z=1] - \E_{Y|X,Z}^\Exp[Y|X=x,Z=0] = \delta + 2 x .
\]
and thus
\[
\E^\Exp[Y(1) - Y(0)] = \E^\Exp[\delta + 2 X] = \delta + 2 \E^\Exp[X] = \delta + 2
\]
<>=
set.seed(23)
nreps<-1000;n<-100;p<-0.5;delta<-2
ests.mat<-matrix(0,nrow=nreps,ncol=2)
for(irep in 1:nreps){
X<-rnorm(n,1,1)
Z<-rbinom(n,1,p)
Y<-rnorm(n,delta*Z+0.5*X + 2*Z*X,1)
p.hat<-mean(Z)
ests.mat[irep,1]<-mean(delta+0.5*X+2*X)-mean(0.5*X)
ests.mat[irep,2]<-sum(Z*(delta*Z+0.5*X+ 2*X*Z))/(n*p.hat)-
sum((1-Z)*(delta*Z+0.5*X+2*X*Z))/(n*(1-p.hat))
}
apply(ests.mat,2,var)
par(mar=c(4,2,1,1))
boxplot(ests.mat,ylim=range(2.5,5));abline(h=delta+2,lty=2,col='red')
@
\pagebreak
Suppose we \textbf{mis-specify} the modelled mean using
\[
m(x,z) = x - 2 x^2 + \delta z + 2 x z
\]
that is, where the dependence on $z$ is correct, but it is not the true conditional mean. We can still unbiasedly estimate $\delta$ using the same estimator:
<>=
set.seed(23)
nreps<-1000;n<-100;p<-0.5;delta<-2
ests.mat<-matrix(0,nrow=nreps,ncol=2)
for(irep in 1:nreps){
X<-rnorm(n,1,1)
Z<-rbinom(n,1,p)
Y<-rnorm(n,delta*Z+0.5*X + 2*Z*X,1)
p.hat<-mean(Z)
ests.mat[irep,1]<-mean(delta+X-X^2+2*X)-mean(X-X^2)
ests.mat[irep,2]<-sum(Z*(delta*Z+X-X^2+2*Z*X))/(n*p.hat)-
sum((1-Z)*(delta*Z+X-X^2+2*Z*X))/(n*(1-p.hat))
}
apply(ests.mat,2,var)
par(mar=c(4,2,1,1))
boxplot(ests.mat,ylim=range(2.5,5));abline(h=delta+2,lty=2,col='red')
@
\pagebreak
However, if we \textbf{mis-specify} the modelled mean using
\[
m(x,z) = x - 2 x^2 + \delta z + x z
\]
that is, where the dependence on $z$ is incorrect, and it is not the true conditional mean, we can no longer unbiasedly estimate $\delta$ using the same estimator:
<>=
set.seed(23)
nreps<-1000;n<-100;p<-0.5;delta<-2
ests.mat<-matrix(0,nrow=nreps,ncol=2)
for(irep in 1:nreps){
X<-rnorm(n,1,1)
Z<-rbinom(n,1,p)
Y<-rnorm(n,delta*Z+0.5*X + 2*Z*X,1)
p.hat<-mean(Z)
ests.mat[irep,1]<-mean(delta+X-X^2+X)-mean(X-X^2)
ests.mat[irep,2]<-sum(Z*(delta*Z+X-X^2+Z*X))/(n*p.hat)-
sum((1-Z)*(delta*Z+X-X^2+Z*X))/(n*(1-p.hat))
}
apply(ests.mat,2,var)
par(mar=c(4,2,1,1))
boxplot(ests.mat,ylim=range(2.5,5));abline(h=delta+2,lty=2,col='red')
@
\end{document}