\documentclass[notitlepage]{article}
\usepackage{Math}
%\usepackage{../../Math598}
\usepackage{listings}
\usepackage{numprint}
\usepackage{enumerate}
\usepackage{bm}
\usepackage{bbm}
\usepackage{verbatim}
\usepackage{mathtools}
\usepackage{dsfont}
\usepackage{scalerel}

\usepackage{amsfonts,amsmath,amssymb}

\usepackage{tikz}
\usetikzlibrary{shapes,decorations,arrows,calc,arrows.meta,fit,positioning}


\usepackage{pgfplots}
\pgfplotsset{compat=1.17}

\newcommand*\circled[1]{\tikz[baseline=(char.base)]{
            \node[shape=circle,draw,inner sep=2pt] (char) {#1};}}

\usepackage{xcolor}

\newtheorem{alg}{Algorithm}
\newtheorem{ex}{Example}
\newtheorem{note}{Note}
\newenvironment{proof}{\par\noindent{\normalfont{\bf{Proof
}}}}
{\hfill \small$\blacksquare$\\[2mm]}


\setlength{\parindent}{0in}

\def\E{\mathbbmss{E}}
\def\Var{\text{Var}}
\def\Cov{\text{Cov}}
\def\Corr{\text{Corr}}

\def\bW{\mathbf{W}}
\def\bH{\mathbf{H}}

\newcommand{\bs}[1]{\bm{#1}}
\newcommand{\Rho}{\mathrm{P}}
\newcommand{\cif}{\text{ if }}
\newcommand{\Ra}{\Longrightarrow}

\def\bphi{\boldsymbol \phi}


\lstloadlanguages{R}

\definecolor{keywordcolor}{rgb}{0,0.6,0.6}
\definecolor{delimcolor}{rgb}{0.461,0.039,0.102}
\definecolor{Rcommentcolor}{rgb}{0.101,0.043,0.432}

\lstdefinestyle{Rsettings}{
  basicstyle=\ttfamily,
  breaklines=true,
  showstringspaces=false,
  keywords={if, else, function, theFunction, tmp}, % Write as many keywords
  otherkeywords={},
  commentstyle=\itshape\color{Rcommentcolor},
  keywordstyle=\color{keywordcolor},
  moredelim=[s][\color{delimcolor}]{"}{"},
}

\lstset{basicstyle=\ttfamily, numbers=none, literate={~} {$\sim$}{2}}

\def\Xb{\mathbf{X}_{\beta}}
\def\Xp{\mathbf{X}_{\psi}}
\def\beps{\boldsymbol{\epsilon}}
\def\betahat{\widehat \beta}
\def\psihat{\widehat \psi}

\begin{document}

<<setup, cache=FALSE, echo=FALSE>>=
library(knitr)
# global chunk options
opts_chunk$set(cache=TRUE, autodep=TRUE)
options(scipen=6)
options(repos=c(CRAN="https://cloud.r-project.org/"))
knit_hooks$set(document = function(x) {sub('\\usepackage[]{color}', '\\usepackage{xcolor}', x, fixed = TRUE)})
@
\begin{center}
{\textsclarge{Project 2: Solutions}}
\end{center}

\tikzset{
    -Latex,auto,node distance =1 cm and 1 cm,semithick,
    state/.style ={circle, draw, minimum width = 0.7 cm},
    box/.style ={rectangle, draw, minimum width = 0.7 cm, fill=lightgray},
    point/.style = {circle, draw, inner sep=0.08cm,fill,node contents={}},
    bidirected/.style={Latex-Latex,dashed},
    el/.style = {inner sep=3pt, align=left, sloped}

}

The basic simulation will be based on:
\begin{itemize}

\item $X_1 \sim Normal(0.5,0.4^2)$, $X_2 \sim Normal(-1,1^2)$ independent.

\item Conditional mean model is
\[
\E_{Y|X,Z}^\calO[Y|X_1=x_1,X_2=x_2,Z=z] = 1 + x_1 + x_2 + x_1 x_2 + z (1 + x_1)
\]
with Normal additive errors with variance $2.5^2$.

\item Conditional model for $Z|X_1=x_1,X_2 = x_2$ is $Bernoulli(e(x))$ with
\[
e(x) = \frac{\exp\{-1.5 + 2 x_1 \}}{1+\exp\{-1.5 + 2 x_1 \}}.
\]

\end{itemize}

That is, $X_1$ is a confounder, but $X_2$ is not.  Here the ATE is
\[
\delta = \mu(1)-\mu(0) = 1 + \E[X_1] = 1.5
\]

<<C1,comment='+', fig.width=7.5, fig.height=4.5, fig.align='center'>>=
#Calculation for large N
library(mvnfast)
set.seed(22087)
N<-10000
mu1<-0.5;sig1<-0.4
X1<-rnorm(N,mu1,sig1)
mu2<--1;sig2<-1
X2<-rnorm(N,mu2,sig2)
al<-c(-1.5,2)
expit<-function(x){return(1/(1+exp(-x)))}
Xa<-cbind(1,X1)
eX0<-expit(Xa %*% al)
Z<-rbinom(N,1,eX0)
Xb<-cbind(1,X1,X2,X1*X2,Z,Z*X1)
be<-c(1,1,1,1,1,1)
sigY<-2.5
Y<-Xb %*% be + rnorm(N)*sigY
par(mar=c(4,4,2,0))
hist(eX0,breaks=seq(0,1,by=0.02),main='True propensity score',xlab=expression(e(x)))
box()
@

<<C2,comment='+', fig.width=7.25, fig.height=4.5, fig.align='center'>>=
#Test Correct model
fit0<-lm(Y~X1+X2+X1:X2+Z+Z:X1)
mu0.0<-mean(predict(fit0,newdata=data.frame(X1=X1,X2=X2,Z=0)))
mu1.0<-mean(predict(fit0,newdata=data.frame(X1=X1,X2=X2,Z=1)))
mu1.0-mu0.0
@


<<C3,comment='+', fig.width=7.25, fig.height=4.5, fig.align='center'>>=
#Base Monte Carlo
nreps<-10000
n<-1000
ests0<-matrix(0,nrow=nreps,ncol=2)
for(irep in 1:nreps){
    X1<-rnorm(n,mu1,sig1)
    X2<-rnorm(n,mu2,sig2)
    Xa<-cbind(1,X1)
    eX0<-expit(Xa %*% al)
    Z<-rbinom(n,1,eX0)
    Xb<-cbind(1,X1,X2,X1*X2,Z,Z*X1)
    Y<-Xb %*% be + rnorm(n)*sigY

    w0<-(1-Z)/(1-eX0)
    w1<-Z/eX0
    ests0[irep,1]<-mean(w1*Y)-mean(w0*Y)
    ests0[irep,2]<-sum(w1*Y)/sum(w1)-sum(w0*Y)/sum(w0)
}
apply(ests0,2,mean)
apply(ests0,2,var)
@

\begin{enumerate}[(a)]

\item The distribution of the propensity score influences the \textit{variance} of the ATE estimator.

<<C4,comment='+', fig.width=7.25, fig.height=4.5, fig.align='center'>>=
#Dependence on PS model
nreps<-10000
n<-1000
ests1<-matrix(0,nrow=nreps,ncol=2)
for(irep in 1:nreps){
    X1<-rnorm(n,mu1,1)     #Change sigma1 to 1
    X2<-rnorm(n,mu2,sig2)
    Xa<-cbind(1,X1)
    eX0<-expit(Xa %*% al)
    Z<-rbinom(n,1,eX0)
    Xb<-cbind(1,X1,X2,X1*X2,Z,Z*X1)
    Y<-Xb %*% be + rnorm(n)*sigY

    w0<-(1-Z)/(1-eX0)
    w1<-Z/eX0
    ests1[irep,1]<-mean(w1*Y)-mean(w0*Y)
    ests1[irep,2]<-sum(w1*Y)/sum(w1)-sum(w0*Y)/sum(w0)
}
par(mar=c(4,4,4,0))
hist(eX0,breaks=seq(0,1,by=0.02),main='New propensity score',xlab=expression(e(x)))
box()
apply(ests1,2,mean)
apply(ests1,2,var)
apply(ests1,2,var)/apply(ests0,2,var)  #Ratio of variances compared to base case.
@

\item Estimators based on $\widetilde \mu(\mathsf{z})$ have different \textit{variances} to estimators based on $\widehat \mu(\mathsf{z})$.

<<C5,comment='+', fig.width=7.25, fig.height=4.5, fig.align='center'>>=
#From the previous results
par(mar=c(4,4,2,0))
boxplot(cbind(ests0,ests1),pch=19,cex=0.7,
    names=c(expression(tilde(delta)),expression(hat(delta)),
    expression(tilde(delta)),expression(hat(delta))))
abline(h=1.5,lty=2,col='red')
title('Original PS and New PS')
v0<-apply(ests0,2,var)
v1<-apply(ests1,2,var)
v1[1]/v0[1]
v1[2]/v0[2]
@

\pagebreak

\item Mis-specification of a propensity score model can lead to \emph{biased (and inconsistent)} estimation of the ATE.

<<C6,comment='+', fig.width=7.25, fig.height=4.5, fig.align='center'>>=
#Mis-specification
nreps<-10000
n<-1000
ests2<-matrix(0,nrow=nreps,ncol=2)
for(irep in 1:nreps){
    X1<-rnorm(n,mu1,sig1)     
    X2<-rnorm(n,mu2,sig2)
    Xa<-cbind(1,X1)
    eX0<-expit(Xa %*% al)
    Z<-rbinom(n,1,eX0)
    Xb<-cbind(1,X1,X2,X1*X2,Z,Z*X1)
    Y<-Xb %*% be + rnorm(n)*sigY

    Xax<-cbind(1,X2)                       #Use X2 instead of X1
    eXx<-fitted(glm(Z~X2,family=binomial))
    
    w0<-(1-Z)/(1-eXx)
    w1<-Z/eXx
    ests2[irep,1]<-mean(w1*Y)-mean(w0*Y)
    ests2[irep,2]<-sum(w1*Y)/sum(w1)-sum(w0*Y)/sum(w0)
}
apply(ests2,2,mean)
par(mar=c(4,4,2,0))
boxplot(ests2,names=c(expression(tilde(delta)),expression(hat(delta))),pch=19,cex=0.7)
title('Mis-specified PS model')
abline(h=1.5,lty=2,col='red')
box()
@

\item Estimation and use of a \emph{parametric} propensity score model can improve the variance of an ATE estimator compared with using the \textit{known} propensity score form.
    
<<C7,comment='+', fig.width=7.25, fig.height=4.5, fig.align='center'>>=
#Estimating the PS
nreps<-10000
n<-250
ests3<-matrix(0,nrow=nreps,ncol=4)
for(irep in 1:nreps){
    X1<-rnorm(n,mu1,sig1)     
    X2<-rnorm(n,mu2,sig2)
    Xa<-cbind(1,X1)
    eX0<-expit(Xa %*% al)
    Z<-rbinom(n,1,eX0)
    Xb<-cbind(1,X1,X2,X1*X2,Z,Z*X1)
    Y<-Xb %*% be + rnorm(n)*sigY

    w0<-(1-Z)/(1-eX0)
    w1<-Z/eX0
    ests3[irep,1]<-mean(w1*Y)-mean(w0*Y)
    ests3[irep,2]<-sum(w1*Y)/sum(w1)-sum(w0*Y)/sum(w0)
    
    eX<-fitted(glm(Z~X1,family=binomial))
    
    w0<-(1-Z)/(1-eX)
    w1<-Z/eX
    ests3[irep,3]<-mean(w1*Y)-mean(w0*Y)
    ests3[irep,4]<-sum(w1*Y)/sum(w1)-sum(w0*Y)/sum(w0)

}
apply(ests3,2,var)
par(mar=c(4,4,2,0))
boxplot(ests3,names=c(expression(tilde(delta)),expression(hat(delta)),
    expression(tilde(delta)),expression(hat(delta))),pch=19,cex=0.7)
title('Estimating the PS model')
abline(h=1.5,lty=2,col='red')
box()

@

\item Propensity score models need only be constructed from \emph{confounders} rather than all predictors.

We now change the data generating model

\begin{itemize}

\item $X_1 \sim Normal(0.5,0.4^2)$, $X_2 \sim Normal(-1,1^2)$ independent.

\item Conditional mean model is
\[
\E_{Y|X,Z}^\calO[Y|X_1=x_1,X_2=x_2,Z=z] = 1 + x_1 + z (1 + x_1)
\]
with Normal additive errors with variance $2.5^2$.

\item Conditional model for $Z|X_1=x_1,X_2 = x_2$ is $Bernoulli(e(x))$ with
\[
e(x) = \frac{\exp\{-1.5 + x_1 + x_2 \}}{1+\exp\{-1.5 + x_1 + x_2 \}}.
\]

\end{itemize}

<<C8,comment='+', fig.width=7.25, fig.height=4.5, fig.align='center'>>=
#Estimating the PS
nreps<-10000
n<-1000
al<-c(-1.5,1,1)
ests4<-matrix(0,nrow=nreps,ncol=4)
for(irep in 1:nreps){
    X1<-rnorm(n,mu1,sig1)
    X2<-rnorm(n,mu2,sig2)
    Xa<-cbind(1,X1,X2)
    eX0<-expit(Xa %*% al)
    Z<-rbinom(n,1,eX0)
    Xb<-cbind(1,X1,Z,Z*X1)
    Y<-Xb %*% be[1:4] + rnorm(n)*sigY

    eX1<-fitted(glm(Z~X1+X2,family=binomial))

    w0<-(1-Z)/(1-eX1)
    w1<-Z/eX1
    ests4[irep,1]<-mean(w1*Y)-mean(w0*Y)
    ests4[irep,2]<-sum(w1*Y)/sum(w1)-sum(w0*Y)/sum(w0)

    eX<-fitted(glm(Z~X1,family=binomial))

    w0<-(1-Z)/(1-eX)
    w1<-Z/eX
    ests4[irep,3]<-mean(w1*Y)-mean(w0*Y)
    ests4[irep,4]<-sum(w1*Y)/sum(w1)-sum(w0*Y)/sum(w0)

}
apply(ests4,2,mean)
apply(ests4,2,var)
par(mar=c(4,4,2,0))
boxplot(ests4,names=c(expression(tilde(delta)[12]),expression(hat(delta)[12]),
    expression(tilde(delta)[1]),expression(hat(delta)[1])),pch=19,cex=0.7)
title('Estimating the PS model only using confounder X1')
abline(h=1.5,lty=2,col='red')
box()

@
\end{enumerate}


\end{document}