\documentclass[notitlepage]{article} \usepackage{Math} %\usepackage{../../Math598} \usepackage{listings} \usepackage{numprint} \usepackage{enumerate} \usepackage{bm} \usepackage{bbm} \usepackage{verbatim} \usepackage{mathtools} \usepackage{dsfont} \usepackage{scalerel} \usepackage{amsfonts,amsmath,amssymb} \usepackage{tikz} \usetikzlibrary{shapes,decorations,arrows,calc,arrows.meta,fit,positioning} \usepackage{pgfplots} \pgfplotsset{compat=1.17} \newcommand*\circled[1]{\tikz[baseline=(char.base)]{ \node[shape=circle,draw,inner sep=2pt] (char) {#1};}} \usepackage{xcolor} \newtheorem{alg}{Algorithm} \newtheorem{ex}{Example} \newtheorem{note}{Note} \newenvironment{proof}{\par\noindent{\normalfont{\bf{Proof }}}} {\hfill \small$\blacksquare$\\[2mm]} \setlength{\parindent}{0in} \def\E{\mathbbmss{E}} \def\Var{\text{Var}} \def\Cov{\text{Cov}} \def\Corr{\text{Corr}} \def\bW{\mathbf{W}} \def\bH{\mathbf{H}} \newcommand{\bs}[1]{\bm{#1}} \newcommand{\Rho}{\mathrm{P}} \newcommand{\cif}{\text{ if }} \newcommand{\Ra}{\Longrightarrow} \def\bphi{\boldsymbol \phi} \lstloadlanguages{R} \definecolor{keywordcolor}{rgb}{0,0.6,0.6} \definecolor{delimcolor}{rgb}{0.461,0.039,0.102} \definecolor{Rcommentcolor}{rgb}{0.101,0.043,0.432} \lstdefinestyle{Rsettings}{ basicstyle=\ttfamily, breaklines=true, showstringspaces=false, keywords={if, else, function, theFunction, tmp}, % Write as many keywords otherkeywords={}, commentstyle=\itshape\color{Rcommentcolor}, keywordstyle=\color{keywordcolor}, moredelim=[s][\color{delimcolor}]{"}{"}, } \lstset{basicstyle=\ttfamily, numbers=none, literate={~} {$\sim$}{2}} \def\Xb{\mathbf{X}_{\beta}} \def\Xp{\mathbf{X}_{\psi}} \def\beps{\boldsymbol{\epsilon}} \def\betahat{\widehat \beta} \def\psihat{\widehat \psi} \begin{document} <>= library(knitr) # global chunk options opts_chunk$set(cache=TRUE, autodep=TRUE) options(scipen=6) options(repos=c(CRAN="https://cloud.r-project.org/")) knit_hooks$set(document = function(x) {sub('\\usepackage[]{color}', '\\usepackage{xcolor}', x, fixed = TRUE)}) @ \begin{center} {\textsclarge{Variance comparison for IPW and G-estimation}} \end{center} \tikzset{ -Latex,auto,node distance =1 cm and 1 cm,semithick, state/.style ={circle, draw, minimum width = 0.7 cm}, box/.style ={rectangle, draw, minimum width = 0.7 cm, fill=lightgray}, point/.style = {circle, draw, inner sep=0.08cm,fill,node contents={}}, bidirected/.style={Latex-Latex,dashed}, el/.style = {inner sep=3pt, align=left, sloped} } We can compute an estimate of the variance-covariance matrix using the theory of estimating equations. For the $p \times 1$ system of estimating equations \[ \sum_{i=1}^n \bU_i(\theta) = \zerovec \] with $\E[\bU(\theta_0)] = \zerovec$, we have that \[ \sqrt{n}(\widehat \theta_n - \theta_0) \CiD Normal_p(\zerovec,\bV) \] where \[ \bV = \mathcal{J}^{-1} \mathcal{I} \mathcal{J}^{-\top} \] with \[ \mathcal{I} = \E[ \bU(\theta_0) \bU(\theta_0)^\top] \qquad \qquad \mathcal{J} = \E[\dot{\bU}(\theta_0)] \] both $(p \times p)$ matrices, and \[ \dot{\bU}(\theta_0) = -\dfrac{\partial \bU(\theta)}{\partial \theta^\top} \bigg |_{\theta = \theta_0} \] We proceed by computing estimates, $\widehat I_n$ and $\widehat J_n$, of the two matrices based on the observed data, and then computing \[ \widehat \bV = \widehat J_n^{-1} \widehat I_n \widehat J_n^{-\top} \] to estimate the asymptotic variance; this approach is known as \textit{sandwich} or \textit{robust} variance estimation. In the following analysis for, we denote the ATE by $\psi_0$. \begin{enumerate}[1.] \item \textbf{IPW version 1:} For the elementary application of IPW, there is a single unknown parameter of interest, and the estimator is \[ \widehat \psi_0 = \frac{1}{n} \sum_{i=1}^n \left(\frac{Z_i}{e(X)} - \frac{1-Z_i}{1-e(X_i)} \right)Y_i \] \begin{enumerate}[(a)] \item \textbf{\textit{Known propensity score:}} The score is \[ U(\theta) = \left(\frac{Z}{e(X)} - \frac{1-Z}{1-e(X)} \right) Y - \psi_0 \] and hence $\dot{U}(\theta) = 1$. \item \textbf{\textit{Unknown propensity score:}} The score is \[ \bU(\theta) = \begin{pmatrix} \left(\dfrac{Z}{e(X;\alpha)} - \dfrac{1-Z}{1-e(X; \alpha)} \right)Y - \psi_0 \\[12pt] \bX^\top (Z - e(X; \alpha)) \end{pmatrix} = \begin{pmatrix} \zeta_Y \\[6pt] \bX^\top \epsilon_Z \end{pmatrix}. \] say. Therefore \[ \mathcal{I} = \E \begin{bmatrix} \zeta_Y^2 & \bX \epsilon_Z \zeta_Y \\[6pt] \bX^\top \epsilon_Z \zeta_Y & \bX^\top \bX \epsilon_Z^2 \end{bmatrix} \qquad \qquad \mathcal{J} = \E \begin{bmatrix} 1 & \left(\dfrac{Z}{e^2} + \dfrac{1-Z}{(1-e)^2} \right) Y \bX e(1-e) \\[12pt] \zerovec & \bX^\top \bX e (1-e) \end{bmatrix}. \] \end{enumerate} \item \textbf{IPW version 2:} For the second version of the IPW estimator, we consider weighted least squares and the objective function \[ \sum_{i=1}^n R_i (Y_i - \beta_0 - Z_i \psi_0)^2 \] where \[ R_i = \left(\dfrac{1-Z}{1-e(X)}+ \dfrac{Z_i}{e(X)} \right). \] This yields the estimator \[ \widehat \psi_0 = \dfrac{\displaystyle\sum_{i=1}^n \dfrac{Z_i Y_i}{e(X_i)}}{\displaystyle \sum_{i=1}^n \dfrac{Z_i}{e(X_i)}} - \dfrac{\displaystyle \sum_{i=1}^n \dfrac{(1-Z_i) Y_i}{1-e(X_i)}}{\displaystyle\sum_{i=1}^n \dfrac{(1-Z_i)}{1-e(X_i)}}. \] \begin{enumerate}[(a)] \item \textbf{\textit{Known propensity score:}} The score vector is \[ \bU(\theta) = \begin{pmatrix} R (Y - \beta_0 - Z \psi_0) \\[6pt] R Z (Y - \beta_0 - Z \psi_0) \end{pmatrix} = \begin{pmatrix} R \varepsilon_Y \\[6pt] R Z \varepsilon_Y \end{pmatrix} \] say, where $\varepsilon_Y = (Y - \beta_0 - Z \psi_0)$. Therefore \[ \mathcal{I} = \E \begin{bmatrix} R^2 \varepsilon_Y^2 & R^2 Z \varepsilon_Y^2 \\[6pt] R^2 Z \varepsilon_Y^2 & R^2 Z^2 \varepsilon_Y^2 \end{bmatrix} \qquad \qquad \mathcal{J} = \E \begin{bmatrix} R & R Z \\[6pt] R Z & RZ^2 \end{bmatrix}. \] \item \textbf{\textit{Unknown propensity score:}} The score vector is \[ \bU(\theta) = \begin{pmatrix} R (Y - \beta_0 - Z \psi_0) \\[6pt] R Z (Y - \beta_0 - Z \psi_0) \\[6pt] \bX^\top (Z - e(X; \alpha)) \end{pmatrix} = \begin{pmatrix} R \varepsilon_Y \\[6pt] R Z \varepsilon_Y \\[6pt] \bX^\top \epsilon_Z \end{pmatrix} \] Therefore \[ \mathcal{I} = \E \begin{bmatrix} R^2 \varepsilon_Y^2 & R^2 Z \varepsilon_Y^2 & \bX R \epsilon_Z \varepsilon_Y \\[6pt] R^2 Z \varepsilon_Y^2 & R^2 Z^2 \varepsilon_Y^2 & \bX R Z \epsilon_Z \varepsilon_Y \\[6pt] \bX^\top R \epsilon_Z \varepsilon_Y & \bX^\top R Z \epsilon_Z \varepsilon_Y & \bX^\top \bX \epsilon_Z^2 \end{bmatrix} \qquad \qquad \mathcal{J} = \E \begin{bmatrix} R & R Z & -\dot{\bR}^\top \varepsilon_Y \\[6pt] R Z & RZ^2 & -\dot{\bR}^\top Z \varepsilon_Y \\[6pt] \zerovec & \zerovec & \bX^\top \bX e (1-e) \end{bmatrix}. \] where \[ \dot{\bR} = \frac{d R}{d \alpha} = \frac{1-Z}{1-e(X)} e(X) \bX^\top - \frac{Z}{e(X)} (1-e(X)) \bX^\top. \] \end{enumerate} \item \textbf{G-estimation:} The most elementary form of the G-estimating equation is based on solving \[ \sum_{i=1}^n (Z_i - e(X_i)) (Y_i - Z_i \psi_0) = 0. \] \begin{enumerate} \item \textbf{\textit{Known propensity score:}} The score vector is \[ U(\theta) = (Z-e(X))(Y - Z \psi_0) \] with $\dot{U}(\theta) = Z (Z-e(X))$. \item \textbf{\textit{Unknown propensity score:}} The score vector is \[ \bU(\theta) = \begin{pmatrix} (Z-e(X; \alpha))(Y - Z \psi_0) \\[6pt] \bX^\top (Z - e(X; \alpha)) \end{pmatrix} = \begin{pmatrix} \epsilon_Z \epsilon_Y \\[6pt] \bX^\top \epsilon_Z \end{pmatrix} \] say, where $\epsilon_Y = (Y - Z \psi_0)$ . Therefore \[ \mathcal{I} = \E \begin{bmatrix} \epsilon_Z^2 \epsilon_Y^2 & \bX \epsilon_Z^2 \epsilon_Y \\[6pt] \epsilon_Z^2 \epsilon_Y \bX^\top & \bX^\top \bX \epsilon_Z^2 \end{bmatrix} \qquad \qquad \mathcal{J} = \E \begin{bmatrix} Z \epsilon_Z & \bX e (1-e) \epsilon_Y\\[6pt] \zerovec & \bX^\top \bX e (1-e) \end{bmatrix}. \] \end{enumerate} \end{enumerate} <>= #Monte Carlo study library(mvnfast) set.seed(22087) n<-10000 muX<-2;sigX<-1 al<-c(-3.5,2);be<-c(2,3,4,1);sigY<-3 expit<-function(x){return(1/(1+exp(-x)))} nreps<-10000 ate.ests<-matrix(0,nrow=nreps,ncol=6) colnames(ate.ests)<-c('1(a)','1(b)','2(a)','2(b)','3(a)','3(b)') for(irep in 1:nreps){ X1<-rnorm(n,muX,sigX) Xm<-cbind(1,X1) eX0<-expit(Xm %*% al) Z<-rbinom(n,1,eX0) Xb<-cbind(1,Z,X1,X1^2) Y<-Xb %*% be + rnorm(n)*sigY eX<-fitted(glm(Z~X1,family=binomial)) ate.ests[irep,1]<-mean(Z*Y/eX0)-mean((1-Z)*Y/(1-eX0)) ate.ests[irep,2]<-mean(Z*Y/eX)-mean((1-Z)*Y/(1-eX)) w<-Z/eX0+(1-Z)/(1-eX0) ate.ests[irep,3]<-coef(lm(Y~Z,weights=w))[2] w<-Z/eX+(1-Z)/(1-eX) ate.ests[irep,4]<-coef(lm(Y~Z,weights=w))[2] res2<-Z-eX0 ate.ests[irep,5]<-sum(res2*Y)/sum(res2*Z) res2<-Z-eX ate.ests[irep,6]<-sum(res2*Y)/sum(res2*Z) } #apply(ate.ests,2,mean)-3 apply(ate.ests,2,var) @ <>= par(mar=c(4,4,1,0)) boxplot(ate.ests,labels=names(ate.ests),pty=19,cex=0.5);abline(h=be[2],lty=2,col='red') @ <>= N<-100000 X1<-rnorm(N,muX,sigX) Xm<-cbind(1,X1) eX0<-expit(Xm %*% al) Z<-rbinom(N,1,eX0) Xb<-cbind(1,Z,X1,X1^2) Y<-Xb %*% be + rnorm(N)*sigY Xb0<-cbind(1,0,X1,X1^2) Xb1<-cbind(1,1,X1,X1^2) mu0<-Xb0 %*% be mu1<-Xb1 %*% be eX<-fitted(glm(Z~X1,family=binomial)) @ <>= #IPW 1a: PS known #Direct calculation vX0<-mean((sigY^2+mu0^2)/(1-eX0)) vX1<-mean((sigY^2+mu1^2)/eX0) ipw.var<-(vX0+vX1)-be[2]^2 ipw.var/n I.n<-J.n<-matrix(0,1,1) res1<-(Z/eX0-(1-Z)/(1-eX0))*Y-be[2] I.n[1,1]<-mean(res1^2) vx<-eX0*(1-eX0) J.n[1,1]<-1 I.var<-solve(J.n) %*% (I.n %*% t(solve(J.n)))/n #This should match the 1(a) variance I.var[1,1] var(ate.ests[,1]) ######################################################## #IPW 1b: PS unknown I.n<-J.n<-matrix(0,3,3) res1<-(Z/eX-(1-Z)/(1-eX))*Y-be[2] res2<-(Z-eX) I.n[1,1]<-mean(res1^2) I.n[1,2]<-I.n[2,1]<-mean(res1*res2) I.n[2,2]<-mean(res2^2) I.n[1,3]<-I.n[3,1]<-mean(X1*res1*res2) I.n[2,3]<-I.n[3,2]<-mean(X1*res2^2) I.n[3,3]<-mean(X1^2*res2^2) I.n vx<-eX*(1-eX) J.n[1,1]<-1 J.n[1,2]<-mean(vx*Y*(Z/eX^2+(1-Z)/(1-eX)^2)) J.n[1,3]<-mean(X1*vx*Y*(Z/eX^2+(1-Z)/(1-eX)^2)) J.n[2,2]<-mean(vx) J.n[2,3]<-J.n[3,2]<-mean(vx*X1) J.n[3,3]<-mean(vx*X1^2) J.n I.var<-solve(J.n) %*% (I.n %*% t(solve(J.n)))/n #This should match the 1b variance I.var[1,1] var(ate.ests[,2]) @ \pagebreak <>= #IPW hat 2a: PS known w0<-(1-Z)/(1-eX0) w1<-Z/eX0 w<-w0+w1 ipw.ests<-coef(lm(Y~Z,weights=w)) res1<-Y-ipw.ests[1]-ipw.ests[2]*Z R<-w I.n<-matrix(0,2,2) I.n[1,1]<-mean(R^2*res1^2) I.n[1,2]<-I.n[2,1]<-mean(R^2*Z*res1^2) I.n[2,2]<-mean(R^2*Z^2*res1^2) round(I.n,6) J.n<-matrix(0,2,2) J.n[1,1]<-mean(R) J.n[1,2]<-mean(R*Z) J.n[2,1]<-mean(R*Z) J.n[2,2]<-mean(R*Z^2) round(J.n,6) #This should match the 2a variance H.var<-solve(J.n) %*% (I.n %*% t(solve(J.n)))/n H.var[2,2] var(ate.ests[,3]) ############################################################ #IPW hat 2b: PS unknown w0<-(1-Z)/(1-eX) w1<-Z/eX w<-w0+w1 ipw.ests<-coef(lm(Y~Z,weights=w)) res1<-Y-ipw.ests[1]-ipw.ests[2]*Z res2<-Z-eX R<-w Rdot1<-((1-Z)/(1-eX))*eX-(Z/eX)*(1-eX) Rdot2<-X1*Rdot1 I.n<-matrix(0,4,4) I.n[1,1]<-mean(R^2*res1^2) I.n[1,2]<-I.n[2,1]<-mean(R^2*Z*res1^2) I.n[1,3]<-I.n[3,1]<-mean(R*res2*res1) I.n[1,4]<-I.n[4,1]<-mean(X1*R*res2*res1) I.n[2,2]<-mean(R^2*Z^2*res1^2) I.n[2,3]<-I.n[3,2]<-mean(R*Z*res2*res1) I.n[2,4]<-I.n[4,2]<-mean(X1*R*Z*res2*res1) I.n[3,3]<-mean(res2^2) I.n[3,4]<-I.n[4,3]<-mean(X1*res2^2) I.n[4,4]<-mean(X1^2*res2^2) round(I.n,6) J.n<-matrix(0,4,4) J.n[1,1]<-mean(R) J.n[1,2]<-mean(R*Z) J.n[1,3]<-mean(-Rdot1*res1) J.n[1,4]<-mean(-Rdot2*res1) J.n[2,1]<-mean(R*Z) J.n[2,2]<-mean(R*Z^2) J.n[2,3]<-mean(-Rdot1*Z*res1) J.n[2,4]<-mean(-Rdot2*Z*res1) J.n[3,3]<-mean(res2^2) J.n[3,4]<-J.n[4,3]<-mean(X1*res2^2) J.n[4,4]<-mean(X1^2*res2^2) round(J.n,6) #This should match the 2c variance H.var<-solve(J.n) %*% (I.n %*% t(solve(J.n)))/n H.var[2,2] var(ate.ests[,4]) @ \pagebreak <>= #G-estimation variance 3a: known PS I.n<-J.n<-matrix(0,1,1) res1<-(Y-be[2]*Z) I.n[1,1]<-mean(res1^2*res2^2) J.n[1,1]<-mean(Z*res2) #This should match the 3a variance G.var<-solve(J.n) %*% (I.n %*% t(solve(J.n)))/n G.var var(ate.ests[,5]) ############################################################ #G-estimation variance 3b: unknown PS I.n<-J.n<-matrix(0,3,3) res1<-(Y-be[2]*Z) res2<-(Z-eX) I.n[1,1]<-mean(res1^2*res2^2) I.n[1,2]<-I.n[2,1]<-mean(res2^2*res1) I.n[2,2]<-mean(res2^2) I.n[1,3]<-I.n[3,1]<-mean(X1*res2^2*res1) I.n[2,3]<-I.n[3,2]<-mean(X1*res2^2) I.n[3,3]<-mean(X1^2*res2^2) I.n vx<-eX*(1-eX) J.n[1,1]<-mean(Z*res2) J.n[1,2]<-mean(vx*res1) J.n[2,2]<-mean(vx) J.n[1,3]<-mean(vx*X1*res1) J.n[2,3]<-J.n[3,2]<-mean(vx*X1) J.n[3,3]<-mean(vx*X1^2) J.n #This should match the 3b variance G.var<-solve(J.n) %*% (I.n %*% t(solve(J.n)))/n G.var[1,1] G.var[1,1]-G.var[1,-1] %*% solve(G.var[-1,-1]) %*% G.var[-1,1] var(ate.ests[,6]) @ \end{document}