\documentclass[notitlepage]{article} \usepackage{Math598} \usepackage{listings} \usepackage{numprint} \usepackage{enumerate} \usepackage{bbm} \usepackage{xypic} \usepackage{amsfonts,amsmath} \def\E{\Expect} \lstloadlanguages{R} \definecolor{keywordcolor}{rgb}{0,0.6,0.6} \definecolor{delimcolor}{rgb}{0.461,0.039,0.102} \definecolor{Rcommentcolor}{rgb}{0.101,0.043,0.432} \lstdefinestyle{Rsettings}{ basicstyle=\ttfamily, breaklines=true, showstringspaces=false, keywords={if, else, function, theFunction, tmp}, % Write as many keywords otherkeywords={}, commentstyle=\itshape\color{Rcommentcolor}, keywordstyle=\color{keywordcolor}, moredelim=[s][\color{delimcolor}]{"}{"}, } \lstset{basicstyle=\ttfamily, numbers=none, literate={~} {$\sim$}{2}} \parindent0in \begin{document} \begin{center} {\textsclarge{Causal Contrasts from Outcome Regression}} \end{center} In a randomized experimental study, inference concerning the causal effect of binary variable $Z$ on $Y$ can be made by direct comparison of sample averages. Suppose that $Z \sim Bernoulli(p)$ for $0>= set.seed(23) nreps<-1000;n<-100;p<-0.5;delta<-2 ests.mat<-matrix(0,nrow=nreps,ncol=2) for(irep in 1:nreps){ X<-rnorm(n,1,1) Z<-rbinom(n,1,p) Y<-rnorm(n,delta*Z+0.5*X,1) p.hat<-mean(Z) ests.mat[irep,1]<-mean(delta+0.5*X)-mean(0.5*X) ests.mat[irep,2]<-sum(Z*(delta*Z+0.5*X))/(n*p.hat)-sum((1-Z)*(delta*Z+0.5*X))/(n*(1-p.hat)) } apply(ests.mat,2,var) @ \pagebreak Suppose now that \[ \E_{Y|X,Z}^\Exp[Y|X=x,Z=z] = 0.5 x + \delta z + 2 x z \] so that \[ \E_{Y|X,Z}^\Exp[Y|X=x,Z=1] - \E_{Y|X,Z}^\Exp[Y|X=x,Z=0] = \delta + 2 x . \] and thus \[ \E^\Exp[Y(1) - Y(0)] = \E^\Exp[\delta + 2 X] = \delta + 2 \E^\Exp[X] = \delta + 2 \] <>= set.seed(23) nreps<-1000;n<-100;p<-0.5;delta<-2 ests.mat<-matrix(0,nrow=nreps,ncol=2) for(irep in 1:nreps){ X<-rnorm(n,1,1) Z<-rbinom(n,1,p) Y<-rnorm(n,delta*Z+0.5*X + 2*Z*X,1) p.hat<-mean(Z) ests.mat[irep,1]<-mean(delta+0.5*X+2*X)-mean(0.5*X) ests.mat[irep,2]<-sum(Z*(delta*Z+0.5*X+ 2*X*Z))/(n*p.hat)- sum((1-Z)*(delta*Z+0.5*X+2*X*Z))/(n*(1-p.hat)) } apply(ests.mat,2,var) par(mar=c(4,2,1,1)) boxplot(ests.mat,ylim=range(2.5,5));abline(h=delta+2,lty=2,col='red') @ \pagebreak Suppose we \textbf{mis-specify} the modelled mean using \[ m(x,z) = x - 2 x^2 + \delta z + 2 x z \] that is, where the dependence on $z$ is correct, but it is not the true conditional mean. We can still unbiasedly estimate $\delta$ using the same estimator: <>= set.seed(23) nreps<-1000;n<-100;p<-0.5;delta<-2 ests.mat<-matrix(0,nrow=nreps,ncol=2) for(irep in 1:nreps){ X<-rnorm(n,1,1) Z<-rbinom(n,1,p) Y<-rnorm(n,delta*Z+0.5*X + 2*Z*X,1) p.hat<-mean(Z) ests.mat[irep,1]<-mean(delta+X-X^2+2*X)-mean(X-X^2) ests.mat[irep,2]<-sum(Z*(delta*Z+X-X^2+2*Z*X))/(n*p.hat)- sum((1-Z)*(delta*Z+X-X^2+2*Z*X))/(n*(1-p.hat)) } apply(ests.mat,2,var) par(mar=c(4,2,1,1)) boxplot(ests.mat,ylim=range(2.5,5));abline(h=delta+2,lty=2,col='red') @ \pagebreak However, if we \textbf{mis-specify} the modelled mean using \[ m(x,z) = x - 2 x^2 + \delta z + x z \] that is, where the dependence on $z$ is incorrect, and it is not the true conditional mean, we can no longer unbiasedly estimate $\delta$ using the same estimator: <>= set.seed(23) nreps<-1000;n<-100;p<-0.5;delta<-2 ests.mat<-matrix(0,nrow=nreps,ncol=2) for(irep in 1:nreps){ X<-rnorm(n,1,1) Z<-rbinom(n,1,p) Y<-rnorm(n,delta*Z+0.5*X + 2*Z*X,1) p.hat<-mean(Z) ests.mat[irep,1]<-mean(delta+X-X^2+X)-mean(X-X^2) ests.mat[irep,2]<-sum(Z*(delta*Z+X-X^2+Z*X))/(n*p.hat)- sum((1-Z)*(delta*Z+X-X^2+Z*X))/(n*(1-p.hat)) } apply(ests.mat,2,var) par(mar=c(4,2,1,1)) boxplot(ests.mat,ylim=range(2.5,5));abline(h=delta+2,lty=2,col='red') @ \end{document}