\documentclass[notitlepage]{article}
\usepackage{Math}
%\usepackage{../../Math598}
\usepackage{listings}
\usepackage{numprint}
\usepackage{enumerate}
\usepackage{bm}
\usepackage{bbm}
\usepackage{verbatim}
\usepackage{mathtools}
\usepackage{scalerel}
\usepackage{dsfont}

\usepackage{amsfonts,amsmath,amssymb}

\usepackage{tikz}
\usetikzlibrary{shapes,decorations,arrows,calc,arrows.meta,fit,positioning}


\usepackage{pgfplots}
\pgfplotsset{compat=1.17}

\newcommand*\circled[1]{\tikz[baseline=(char.base)]{
            \node[shape=circle,draw,inner sep=2pt] (char) {#1};}}

\usepackage{xcolor}

\newtheorem{alg}{Algorithm}
\newtheorem{ex}{Example}
\newtheorem{note}{Note}
\newenvironment{proof}{\par\noindent{\normalfont{\bf{Proof
}}}}
{\hfill \small$\blacksquare$\\[2mm]}


\setlength{\parindent}{0in}

\def\E{\mathbbmss{E}}
\def\Var{\text{Var}}
\def\Cov{\text{Cov}}
\def\Corr{\text{Corr}}

\def\bW{\mathbf{W}}
\def\bH{\mathbf{H}}

\newcommand{\bs}[1]{\bm{#1}}
\newcommand{\Rho}{\mathrm{P}}
\newcommand{\cif}{\text{ if }}
\newcommand{\Ra}{\Longrightarrow}

\def\bphi{\boldsymbol \phi}


\lstloadlanguages{R}

\definecolor{keywordcolor}{rgb}{0,0.6,0.6}
\definecolor{delimcolor}{rgb}{0.461,0.039,0.102}
\definecolor{Rcommentcolor}{rgb}{0.101,0.043,0.432}

\lstdefinestyle{Rsettings}{
  basicstyle=\ttfamily,
  breaklines=true,
  showstringspaces=false,
  keywords={if, else, function, theFunction, tmp}, % Write as many keywords
  otherkeywords={},
  commentstyle=\itshape\color{Rcommentcolor},
  keywordstyle=\color{keywordcolor},
  moredelim=[s][\color{delimcolor}]{"}{"},
}

\lstset{basicstyle=\ttfamily, numbers=none, literate={~} {$\sim$}{2}}

\def\Xb{\mathbf{X}_{\beta}}
\def\Xp{\mathbf{X}_{\psi}}
\def\beps{\boldsymbol{\epsilon}}
\def\betahat{\widehat \beta}
\def\psihat{\widehat \psi}

\begin{document}

<<setup, cache=FALSE, echo=FALSE>>=
library(knitr)
# global chunk options
opts_chunk$set(cache=TRUE, autodep=TRUE)
options(scipen=6)
options(repos=c(CRAN="https://cloud.r-project.org/"))
knit_hooks$set(document = function(x) {sub('\\usepackage[]{color}', '\\usepackage{xcolor}', x, fixed = TRUE)})
@
\begin{center}
{\textsclarge{Augmented Inverse Probability Weighting}}
\end{center}

\tikzset{
    -Latex,auto,node distance =1 cm and 1 cm,semithick,
    state/.style ={circle, draw, minimum width = 0.7 cm},
    box/.style ={rectangle, draw, minimum width = 0.7 cm, fill=lightgray},
    point/.style = {circle, draw, inner sep=0.08cm,fill,node contents={}},
    bidirected/.style={Latex-Latex,dashed},
    el/.style = {inner sep=3pt, align=left, sloped}

}

The propensity score for binary exposure, denoted $e(X)$, 
\[
e(x) = f_{Z|X}^\calO (1|x) = {\Pr}_{Z|X}^\calO[Z=1|X=x]
\]
can be used to construct generalized versions of the inverse probability weighted (IPW) estimators based on augmentation via the conditional mean model $\mu(x,z)$
\[
\E_{Y|X,Z}^\calO[Y | X=x,Z=z] = \mu(x,z)
\]
We deduce that augmented IPW (AIPW) estimators are
\begin{align*}
\widetilde \mu_{\text{AIPW}} (0) & = \dfrac{1}{n} \sum\limits_{i=1}^n \dfrac{(1-Z_i) (Y_i-\mu(X_i,0))}{1-e(X_i)} + \frac{1}{n} \sum_{i=1}^n \mu(X_i,0) \\[6pt] \widetilde \mu_{\text{AIPW}} (1) &  = \dfrac{1}{n} \sum\limits_{i=1}^n \dfrac{Z_i (Y_i-\mu(X_i,1))}{e(X_i)} + \frac{1}{n} \sum_{i=1}^n \mu(X_i,1)
\end{align*}
or, in standardized weight form
\begin{align*}
\widehat \mu_{\text{AIPW}} (0) & = \sum\limits_{i=1}^n W_{i0} (Y_i-\mu(X_i,0)) + \frac{1}{n} \sum_{i=1}^n \mu(X_i,0) \\[6pt] 
\widehat \mu_{\text{AIPW}} (1) & = \sum\limits_{i=1}^n W_{i1} (Y_i-\mu(X_i,1)) + \frac{1}{n} \sum_{i=1}^n \mu(X_i,1)
\end{align*}
with
\[
W_{i0} = \dfrac{\dfrac{(1-Z_i)}{(1-e(X_i))}}{\sum\limits_{j=1}^n \dfrac{(1-Z_j)}{(1-e(X_j))}} \qquad \qquad
W_{i1} = \dfrac{\dfrac{Z_i}{e(X_i)}}{\sum\limits_{j=1}^n \dfrac{Z_j}{e(X_j)}}
\]
These estimators are generalizations of IPW estimators, and are more robust to misspecification: the estimators are consistent provided \textbf{at least one of} the two models $e(x)$ or $\mu(x,z)$ is correctly specified.  That is, estimators that use instead the models
\[
g(x) \qquad \text{and} \qquad m(x,z)
\]
for the propensity score and conditional mean model respectively are still consistent provided either $g(x) \equiv e(x)$ or $m(x,z) \equiv \mu(x,z)$.  If \textbf{both} are correctly specified, the estimators are the optimal IPW estimators.


\bigskip
\textbf{Simulation study:}  In the following simulation, we have $k=10$ predictors, but only the first three are confounders. We compare the results for the following models:
\begin{align*}
  \textbf{Model 0 :} & \quad \textrm{Outcome regression under correct specification of $\mu(x,z)$} \\[6pt]
  \textbf{Model 1 :} & \quad \textrm{IPW with correct specification of $e(x)$} \\[6pt]
  \textbf{Model 2 :} & \quad \textrm{AIPW with correct specification of both $e(x)$ and $\mu(x,z)$}\\[6pt]
  \textbf{Model 3 :} & \quad \textrm{AIPW with correct specification of $e(x)$ only}\\[6pt]
  \textbf{Model 4 :} & \quad \textrm{AIPW with correct specification of $\mu(x,z)$ only}\\[6pt]
  \textbf{Model 5 :} & \quad \textrm{AIPW with both models mis-specified}
\end{align*}
We compute both the original (denoted a) and standardized weight (denoted b) estimators.  We perform 2000 replicate analyses with $n=1000$.  In the first analysis, we assume that the propensity score is known.

\pagebreak

<<C1,comment='+', fig.width=7.25, fig.height=5.0, fig.align='center'>>=
#Monte Carlo study
library(mvnfast)
set.seed(23987)
n<-1000
k<-10
Mu<-sample(-5:5,size=k,rep=T)/2
Sigma<-0.1*0.8^abs(outer(1:k,1:k,'-'))
al<-c(-4,rep(1,6),rep(-2,4))
expit<-function(x){return(1/(1+exp(-x)))};logit<-function(x){return(log(x)-log(1-x))}
be<-rep(0,k+2)
be[1:5]<-c(2,1,-2.0,2.2,3.6)
sig<-2
nreps<-2000
ests<-matrix(0,nrow=nreps,ncol=11)
for(irep in 1:nreps){

    X<-rmvn(n,mu=Mu,Sigma)
    Xal<-cbind(1,X)
    ps.true<-expit(Xal %*% al)
    Z<-rbinom(n,1,ps.true)
    Xm<-cbind(1,Z,X)
    muval<-as.vector(Xm %*% be)
    
    Y<-rnorm(n,muval,sig)
    eX<-ps.true
    w<-(1-Z)/(1-eX)+Z/eX
    w0<-(1-Z)/(1-eX)
    W0<-w0/sum(w0)
    w1<-Z/eX
    W1<-w1/sum(w1)
    
    mu0<- cbind(1,0,X) %*% be
    mu1<- cbind(1,1,X) %*% be

    #Intercept only mis-specified model
    m0<- cbind(1,0,0*X) %*% be
    m1<- cbind(1,1,0*X) %*% be
   
    ests[irep,1]<-mean(mu1)-mean(mu0)
    ests[irep,2]<-mean(w1*Y)-mean(w0*Y)
    ests[irep,3]<-sum(W1*Y)-sum(W0*Y)
    ests[irep,4]<-mean(w1*(Y-mu1))+mean(mu1)-mean(w0*(Y-mu0))-mean(mu0)
    ests[irep,5]<-sum(W1*(Y-mu1))+mean(mu1)-sum(W0*(Y-mu0))-mean(mu0)
    ests[irep,6]<-mean(w1*(Y-m1))+mean(m1)-mean(w0*(Y-m0))-mean(m0)
    ests[irep,7]<-sum(W1*(Y-m1))+mean(m1)-sum(W0*(Y-m0))-mean(m0)

    #Mis-specified PS model using X1 only.
    #Obtain intercept from data
    gX<-expit(logit(mean(Z))+al[2]*X[,1])
    wg0<-(1-Z)/(1-gX)
    Wg0<-wg0/sum(wg0)
    wg1<-Z/gX
    Wg1<-wg1/sum(wg1)

    ests[irep,8]<-mean(wg1*(Y-mu1))+mean(mu1)-mean(wg0*(Y-mu0))-mean(mu0)
    ests[irep,9]<-sum(Wg1*(Y-mu1))+mean(mu1)-sum(Wg0*(Y-mu0))-mean(mu0)

    ests[irep,10]<-mean(wg1*(Y-m1))+mean(m1)-mean(wg0*(Y-m0))-mean(m0)
    ests[irep,11]<-sum(Wg1*(Y-m1))+mean(m1)-sum(Wg0*(Y-m0))-mean(m0)

}

@

<<C2,comment='+', fig.width=7.25, fig.height=5.0, fig.align='center'>>=
par(mar=c(4,4,3,0))
nvec<-as.character(c(0,rep(1:5,each=2)))
nvec<-paste('M',nvec,sep='')
nvec[c(2,4,6,8,10)]<-paste(nvec[c(2,4,6,8,10)],'a',sep='')
nvec[c(3,5,7,9,11)]<-paste(nvec[c(3,5,7,9,11)],'b',sep='')
boxplot(ests,ylim=range(-3.5,3.5),pch=19,cex=0.5,names=nvec)
title('IPW and AIPW analyses: True models')
abline(h=1,lty=2,col='red')
#Bias
b<-sqrt(n)*apply(ests-1,2,mean)
#Variance
v<-n*apply(ests,2,var)
#MSE
mse<-b^2+v
res<-rbind(b,v,mse)
colnames(res)<-nvec
rownames(res)<-c('Bias','Var.','MSE')
round(res,2)
@
The results are as expected in light of the theory.

\bigskip
In a second analysis, we now \textbf{estimate} the propensity score model.

\pagebreak

<<C3,comment='+', fig.width=7.25, fig.height=5.0, fig.align='center'>>=
#Monte Carlo study
set.seed(23987)
nreps<-2000
ests2<-matrix(0,nrow=nreps,ncol=11)
for(irep in 1:nreps){

    X<-rmvn(n,mu=Mu,Sigma)
    Xal<-cbind(1,X)
    ps.true<-expit(Xal %*% al)
    Z<-rbinom(n,1,ps.true)
    Xm<-cbind(1,Z,X)
    muval<-as.vector(Xm %*% be)

    Y<-rnorm(n,muval,sig)
    ps.fitted<-fitted(glm(Z~X,family=binomial))
    eX<-ps.fitted
    w<-(1-Z)/(1-eX)+Z/eX
    w0<-(1-Z)/(1-eX)
    W0<-w0/sum(w0)
    w1<-Z/eX
    W1<-w1/sum(w1)

    mu0<- cbind(1,0,X) %*% be
    mu1<- cbind(1,1,X) %*% be

    #Intercept only mis-specified model
    m0<- cbind(1,0,0*X) %*% be
    m1<- cbind(1,1,0*X) %*% be

    ests2[irep,1]<-mean(mu1)-mean(mu0)
    ests2[irep,2]<-mean(w1*Y)-mean(w0*Y)
    ests2[irep,3]<-sum(W1*Y)-sum(W0*Y)
    ests2[irep,4]<-mean(w1*(Y-mu1))+mean(mu1)-mean(w0*(Y-mu0))-mean(mu0)
    ests2[irep,5]<-sum(W1*(Y-mu1))+mean(mu1)-sum(W0*(Y-mu0))-mean(mu0)
    ests2[irep,6]<-mean(w1*(Y-m1))+mean(m1)-mean(w0*(Y-m0))-mean(m0)
    ests2[irep,7]<-sum(W1*(Y-m1))+mean(m1)-sum(W0*(Y-m0))-mean(m0)

    #Mis-specified PS model using X1 only.
    gX<-fitted(glm(Z~X[,1]),family=binomial)
    wg0<-(1-Z)/(1-gX)
    Wg0<-wg0/sum(wg0)
    wg1<-Z/gX
    Wg1<-wg1/sum(wg1)

    ests2[irep,8]<-mean(wg1*(Y-mu1))+mean(mu1)-mean(wg0*(Y-mu0))-mean(mu0)
    ests2[irep,9]<-sum(Wg1*(Y-mu1))+mean(mu1)-sum(Wg0*(Y-mu0))-mean(mu0)

    ests2[irep,10]<-mean(wg1*(Y-m1))+mean(m1)-mean(wg0*(Y-m0))-mean(m0)
    ests2[irep,11]<-sum(Wg1*(Y-m1))+mean(m1)-sum(Wg0*(Y-m0))-mean(m0)

}

@

<<C4,comment='+', fig.width=7.25, fig.height=5.0, fig.align='center'>>=
par(mar=c(4,4,3,0))
nvec<-as.character(c(0,rep(1:5,each=2)))
nvec<-paste('M',nvec,sep='')
nvec[c(2,4,6,8,10)]<-paste(nvec[c(2,4,6,8,10)],'a',sep='')
nvec[c(3,5,7,9,11)]<-paste(nvec[c(3,5,7,9,11)],'b',sep='')
boxplot(ests2,ylim=range(-3.5,3.5),pch=19,cex=0.5,names=nvec)
title('IPW and AIPW analyses: estimated PS model')
abline(h=1,lty=2,col='red')
#Bias
b<-sqrt(n)*apply(ests2-1,2,mean)
#Variance
v<-n*apply(ests2,2,var)
#MSE
mse<-b^2+v
res2<-rbind(b,v,mse)
colnames(res2)<-nvec
rownames(res2)<-c('Bias','Var.','MSE')
round(res2,2)
@

The variances are decreased after estimation 

\bigskip
In a third analysis, we now estimate the propensity score \textbf{and} the conditional mean models.

\pagebreak

<<C5,comment='+', fig.width=7.25, fig.height=5.0, fig.align='center'>>=
#Monte Carlo study
set.seed(23987)
nreps<-2000
ests3<-matrix(0,nrow=nreps,ncol=11)
for(irep in 1:nreps){

    X<-rmvn(n,mu=Mu,Sigma)
    Xal<-cbind(1,X)
    ps.true<-expit(Xal %*% al)
    Z<-rbinom(n,1,ps.true)
    Xm<-cbind(1,Z,X)
    muval<-as.vector(Xm %*% be)

    Y<-rnorm(n,muval,sig)
    ps.fitted<-fitted(glm(Z~X,family=binomial))
    eX<-ps.fitted
    w<-(1-Z)/(1-eX)+Z/eX
    w0<-(1-Z)/(1-eX)
    W0<-w0/sum(w0)
    w1<-Z/eX
    W1<-w1/sum(w1)

    fit.mu<-lm(Y~Z+X)
    mu0<- predict(fit.mu,newdata=data.frame(Z=0,X=X))
    mu1<- predict(fit.mu,newdata=data.frame(Z=1,X=X))

    #Intercept only mis-specified model
    X1<-X[,1]
    fit.m<-lm(Y~Z+X1)
    m0<- predict(fit.m,newdata=data.frame(Z=0,X1=X[,1]))
    m1<- predict(fit.m,newdata=data.frame(Z=1,X1=X[,1]))

    ests3[irep,1]<-mean(mu1)-mean(mu0)
    ests3[irep,2]<-mean(w1*Y)-mean(w0*Y)
    ests3[irep,3]<-sum(W1*Y)-sum(W0*Y)
    ests3[irep,4]<-mean(w1*(Y-mu1))+mean(mu1)-mean(w0*(Y-mu0))-mean(mu0)
    ests3[irep,5]<-sum(W1*(Y-mu1))+mean(mu1)-sum(W0*(Y-mu0))-mean(mu0)
    ests3[irep,6]<-mean(w1*(Y-m1))+mean(m1)-mean(w0*(Y-m0))-mean(m0)
    ests3[irep,7]<-sum(W1*(Y-m1))+mean(m1)-sum(W0*(Y-m0))-mean(m0)

    #Mis-specified PS model using X1 only.
    gX<-fitted(glm(Z~X[,1]),family=binomial)
    wg0<-(1-Z)/(1-gX)
    Wg0<-wg0/sum(wg0)
    wg1<-Z/gX
    Wg1<-wg1/sum(wg1)

    ests3[irep,8]<-mean(wg1*(Y-mu1))+mean(mu1)-mean(wg0*(Y-mu0))-mean(mu0)
    ests3[irep,9]<-sum(Wg1*(Y-mu1))+mean(mu1)-sum(Wg0*(Y-mu0))-mean(mu0)

    ests3[irep,10]<-mean(wg1*(Y-m1))+mean(m1)-mean(wg0*(Y-m0))-mean(m0)
    ests3[irep,11]<-sum(Wg1*(Y-m1))+mean(m1)-sum(Wg0*(Y-m0))-mean(m0)

}

@

<<C6,comment='+', fig.width=7.25, fig.height=5.0, fig.align='center'>>=
par(mar=c(4,4,3,0))
nvec<-as.character(c(0,rep(1:5,each=2)))
nvec<-paste('M',nvec,sep='')
nvec[c(2,4,6,8,10)]<-paste(nvec[c(2,4,6,8,10)],'a',sep='')
nvec[c(3,5,7,9,11)]<-paste(nvec[c(3,5,7,9,11)],'b',sep='')
boxplot(ests3,ylim=range(-3.5,3.5),pch=19,cex=0.5,names=nvec)
title('IPW and AIPW analyses: Both models estimated')
abline(h=1,lty=2,col='red')
#Bias
b<-sqrt(n)*apply(ests3-1,2,mean)
#Variance
v<-n*apply(ests3,2,var)
#MSE
mse<-b^2+v
res3<-rbind(b,v,mse)
colnames(res3)<-nvec
rownames(res3)<-c('Bias','Var.','MSE')
round(res3,2)
@
\end{document}