\documentclass[notitlepage]{article} \usepackage{Math} %\usepackage{../../Math598} \usepackage{listings} \usepackage{numprint} \usepackage{enumerate} \usepackage{bm} \usepackage{bbm} \usepackage{verbatim} \usepackage{mathtools} \usepackage{dsfont} \usepackage{scalerel} \usepackage{amsfonts,amsmath,amssymb} \usepackage{tikz} \usetikzlibrary{shapes,decorations,arrows,calc,arrows.meta,fit,positioning} \usepackage{pgfplots} \pgfplotsset{compat=1.17} \newcommand*\circled[1]{\tikz[baseline=(char.base)]{ \node[shape=circle,draw,inner sep=2pt] (char) {#1};}} \usepackage{xcolor} \newtheorem{alg}{Algorithm} \newtheorem{ex}{Example} \newtheorem{note}{Note} \newenvironment{proof}{\par\noindent{\normalfont{\bf{Proof }}}} {\hfill \small$\blacksquare$\\[2mm]} \setlength{\parindent}{0in} \def\E{\mathbbmss{E}} \def\Var{\text{Var}} \def\Cov{\text{Cov}} \def\Corr{\text{Corr}} \def\bW{\mathbf{W}} \def\bH{\mathbf{H}} \newcommand{\bs}[1]{\bm{#1}} \newcommand{\Rho}{\mathrm{P}} \newcommand{\cif}{\text{ if }} \newcommand{\Ra}{\Longrightarrow} \def\bphi{\boldsymbol \phi} \lstloadlanguages{R} \definecolor{keywordcolor}{rgb}{0,0.6,0.6} \definecolor{delimcolor}{rgb}{0.461,0.039,0.102} \definecolor{Rcommentcolor}{rgb}{0.101,0.043,0.432} \lstdefinestyle{Rsettings}{ basicstyle=\ttfamily, breaklines=true, showstringspaces=false, keywords={if, else, function, theFunction, tmp}, % Write as many keywords otherkeywords={}, commentstyle=\itshape\color{Rcommentcolor}, keywordstyle=\color{keywordcolor}, moredelim=[s][\color{delimcolor}]{"}{"}, } \lstset{basicstyle=\ttfamily, numbers=none, literate={~} {$\sim$}{2}} \def\Xb{\mathbf{X}_{\beta}} \def\Xp{\mathbf{X}_{\psi}} \def\beps{\boldsymbol{\epsilon}} \def\betahat{\widehat \beta} \def\psihat{\widehat \psi} \begin{document} <>= library(knitr) # global chunk options #opts_chunk$set(cache=TRUE, autodep=TRUE) options(scipen=6) options(repos=c(CRAN="https://cloud.r-project.org/")) knit_hooks$set(document = function(x) {sub('\\usepackage[]{color}', '\\usepackage{xcolor}', x, fixed = TRUE)}) @ \begin{center} {\textsclarge{Propensity score adjustment with parameter estimation}} \end{center} \tikzset{ -Latex,auto,node distance =1 cm and 1 cm,semithick, state/.style ={circle, draw, minimum width = 0.7 cm}, box/.style ={rectangle, draw, minimum width = 0.7 cm, fill=lightgray}, point/.style = {circle, draw, inner sep=0.08cm,fill,node contents={}}, bidirected/.style={Latex-Latex,dashed}, el/.style = {inner sep=3pt, align=left, sloped} } Consider the data generating (structural) conditional mean model for binary treatment \[ \E_{Y|X,Z}^\calO[Y|X=x,Z=z] = 2 + 4 x + x^2 + 3 z \] and propensity score adjusted estimation of the ATE , which in this case is equal to $\psi_0 = 3$. $X \sim Normal(\mu_X,\sigma_X^2)$ is a confounder, and the conditional model for $Z|X=x$ is $Bernoulli(e(x))$ with \[ e(x) = \frac{\exp\{-3.5 + 2 x\}}{1+\exp\{-3.5 + 2 x\}}. \] In the analysis a parametric model with parameter $\alpha$ estimated via the logistic regression model is used. <>= #Calculation for large N library(mvnfast) set.seed(22087) N<-1000000 muX<-2;sigX<-0.5 X1<-rnorm(N,muX,sigX) al<-c(-3.5,2) expit<-function(x){return(1/(1+exp(-x)))} Xm<-cbind(1,X1) be<-c(2,3,4,1) sigY<-3 ps.true<-expit(Xm %*% al) Z<-rbinom(N,1,ps.true) Xb<-cbind(1,Z,X1,X1^2) Y<-Xb %*% be + rnorm(N)*sigY par(mar=c(4,4,2,0)) hist(ps.true,breaks=seq(0,1,by=0.02),main='True propensity score',xlab=expression(e(x))) box() @ It is easy to see that a correctly specified regression model will estimate the average treatment effect (ATE) correctly, and a mis-specified model will not. <>= #Correct model coef(summary(lm(Y~Z+X1+I(X1^2)))) #Incorrect model coef(summary(lm(Y~Z))) @ \bigskip \textbf{Propensity score regression:} Consider OLS estimation using the (mis-specified) mean model: \[ \E_{Y|X,Z}^\calO[Y|X=x,Z=z] = \beta_0 + z \psi_0 + e(x;\widehat \alpha) \phi_0 \] <>= #PS regression: Correct PS model eX<-fitted(glm(Z~X1,family=binomial)) coef(summary(lm(Y~Z+eX))) @ The estimation is equivalent to OLS estimation that solves an OLS linear system \[ \sum_{i=1}^n \begin{pmatrix} 1 \\[6pt] z_i \\[6pt] e(x_i;\widehat \alpha) \end{pmatrix} (y_i - \beta_0 - z_i \psi_0 - e(x_i;\widehat \alpha) \phi_0) = \begin{pmatrix} 0 \\[6pt] 0 \\[6pt] 0\end{pmatrix} \] or equivalently \[ \sum_{i=1}^n \begin{pmatrix} 1 \\[6pt] (z_i-e(x_i;\widehat \alpha)) \\[6pt] e(x_i;\widehat \alpha) \end{pmatrix} (y_i - \beta_0 - z_i \psi_0 - e(x_i;\widehat \alpha) \phi_0) = \begin{pmatrix} 0 \\[6pt] 0 \\[6pt] 0\end{pmatrix} \] to yield \[ \begin{pmatrix} \widehat \beta_0 \\[6pt] \widehat \psi_0 \\[6pt] \widehat \phi_0 \end{pmatrix} = (\bX^\top \bX)^{-1} \bX^\top \by \qquad \qquad \bX = \begin{pmatrix} 1 & z_1 & e(x_1;\widehat \alpha)\\[3pt] 1 & z_2 & e(x_2;\widehat \alpha)\\[3pt] \vdots & \vdots & \vdots\\[3pt] 1 & z_n & e(x_n;\widehat \alpha) \end{pmatrix}. \] <>= Xm<-cbind(1,Z,eX) solve(t(Xm) %*% Xm) %*% t(Xm) %*% Y @ However, an alternative \textbf{G-estimating} form is \[ \sum_{i=1}^n \begin{pmatrix} 1 \\ (z_i-e(x_i;\widehat \alpha)) \end{pmatrix} (y_i - \beta_0 - z_i \psi_0) = \begin{pmatrix} 0 \\[6pt] 0 \end{pmatrix} \] that is, using the estimating (score) function \[ \bZ_1^\top (\by - \bZ_2 \theta) = \zerovec \] say, where $\theta = (\beta_0, \psi_0)$. \[ \bZ_1 = \begin{pmatrix} 1 & z_1 - e(x_1;\widehat \alpha) \\[3pt] 1 & z_2 - e(x_2;\widehat \alpha) \\[3pt] \vdots & \vdots \\[3pt] 1 & z_n - e(x_n;\widehat \alpha) \end{pmatrix} \qquad \bZ_2 = \begin{pmatrix} 1 & z_1 \\[3pt] 1 & z_2 \\[3pt] \vdots & \vdots \\[3pt] 1 & z_n \end{pmatrix} \] The solution is therefore \[ \begin{pmatrix} \widehat \beta_0 \\[6pt] \widehat \psi_0 \end{pmatrix} = (\bZ_1^\top \bZ_2)^{-1} \bZ_1^\top \by \] <>= Zm1<-cbind(1,Z-eX) Zm2<-cbind(1,Z) g.ests<-solve(t(Zm1) %*% Zm2) %*% t(Zm1) %*% Y g.ests @ We can incorporate the estimation of the propensity score parameters into the same estimating equation formulation by adding in the estimating equation for $\alpha$, namely, for the logistic regression model, the equations \[ \sum_{i=1}^n \bx_i^\top (z_i - e(x_i;\alpha) )= \zerovec. \] \medskip We can compute an estimate of the variance-covariance matrix using the theory of estimating equations. For the $p \times 1$ system of estimating equations \[ \sum_{i=1}^n \bU_i(\theta) = \zerovec \] with $\E[\bU(\theta_0)] = \zerovec$, we have that \[ \sqrt{n}(\widehat \theta_n - \theta_0) \CiD Normal_p(\zerovec,\bV) \] where \[ \bV = \mathcal{J}^{-1} \mathcal{I} \mathcal{J}^{-\top} \] with \[ \mathcal{I} = \E[ \bU(\theta_0) \bU(\theta_0)^\top] \qquad \qquad \mathcal{J} = \E[\dot{\bU}(\theta_0)] \] both $(p \times p)$ matrices, and \[ \dot{\bU}(\theta_0) = \dfrac{\partial \bU(\theta)}{\partial \theta^\top} \bigg |_{\theta = \theta_0} \] We proceed by computing estimates, $\widehat I_n$ and $\widehat J_n$, of the two matrices based on the observed data, and then computing \[ \widehat \bV = \widehat J_n^{-1} \widehat I_n \widehat J_n^{-\top} \] to estimate the asymptotic variance; this approach is known as \textit{sandwich} or \textit{robust} variance estimation. \begin{itemize} \item For \textbf{propensity score regression} we stack the estimating equations \[ \sum_{i=1}^n \begin{pmatrix} (y_i - \beta_0 - z_i \psi_0 - e(x_i; \alpha) \phi_0) \\[6pt] z_i (y_i - \beta_0 - z_i \psi_0 - e(x_i; \alpha) \phi_0) \\[6pt] e(x_i; \alpha) (y_i - \beta_0 - z_i \psi_0 - e(x_i; \alpha) \phi_0) \\[6pt] \bx_i^\top (z_i - e(x_i; \alpha)) \end{pmatrix} = \begin{pmatrix} 0 \\[6pt] 0 \\[6pt] 0 \\[6pt] 0 \end{pmatrix} \] and then compute the $\mathcal{I}$ and $\mathcal{J}$ and their estimated values in this extended system. The score vector is \[ \bU(\theta) = \begin{pmatrix} (Y - \beta_0 - Z \psi_0 - e(X; \alpha) \phi_0) \\[6pt] Z (Y - \beta_0 - Z \psi_0 - e(X; \alpha) \phi_0) \\[6pt] e(X; \alpha) (Y - \beta_0 - Z \psi_0 - e(X; \alpha) \phi_0) \\[6pt] \bX^\top (Z - e(X; \alpha)) \end{pmatrix} = \begin{pmatrix} \epsilon_Y \\[6pt] Z \epsilon_Y \\[6pt] e \epsilon_Y \\[6pt] \bX^\top \epsilon_Z \end{pmatrix} \] say, denoting $e \equiv e(X;\alpha)$, where $\bX = (1, X)$, and \[ \epsilon_Y = (Y - \beta_0 - Z \psi_0 - e(X; \alpha) \phi_0) \qquad \qquad \epsilon_Z = (Z - e(X; \alpha)). \] Then \[ \mathcal{I} = \E \begin{bmatrix} \epsilon_Y^2 & Z \epsilon_Y^2 & e \epsilon_Y^2 & \bX \epsilon_Z \epsilon_Y \\[6pt] Z \epsilon_Y^2 & Z^2 \epsilon_Y^2 & Z e \epsilon_Y^2 & \bX Z \epsilon_Z \epsilon_Y \\[6pt] e \epsilon_Y^2 & Z e\epsilon_Y^2 & e^2 \epsilon_Y^2 & e \epsilon_Z \epsilon_Y \\[6pt] \bX^\top \epsilon_Z \epsilon_Y & \bX^\top Z \epsilon_Z \epsilon_Y & \bX^\top Z \epsilon_Z \epsilon_Y & \bX^\top \bX \epsilon_Z^2 \end{bmatrix} \qquad \mathcal{J} = \E \begin{bmatrix} 1 & Z & e & \bX e(1-e) \phi_0 \\[6pt] Z & Z^2 & Z e & Z \bX e(1-e) \phi_0 \\[6pt] e & Z e & e^2 & \bX e(1-e) (e \phi_0 - \epsilon_Y) \\[6pt] 0 & 0 & 0 & \bX^\top \bX e (1-e) \end{bmatrix}. \] <>= #Propensity score regression psr.ests<-coef(lm(Y~Z+eX)) res1<-Y-psr.ests[1]-psr.ests[2]*Z - psr.ests[3]*eX res2<-Z-eX eterm<-eX*(1-eX) dterm<-eX*psr.ests[3]-res1 Xm1<-cbind(1,Z,eX) * matrix(res1,nrow=N,ncol=3) Xm2<-cbind(1,X1) * matrix(res2,nrow=N,ncol=2) Xm<-cbind(Xm1,Xm2) I.n<-matrix((t(Xm) %*% Xm)/N,5,5) round(I.n,6) J.n<-matrix(0,5,5) J.n[1:3,1:3]<-crossprod(cbind(1,Z,eX))/N J.n[1,4:5]<-c(mean(eterm*psr.ests[3]),mean(X1*eterm*psr.ests[3])) J.n[2,4:5]<-c(mean(Z*eterm*psr.ests[3]),mean(Z*X1*eterm*psr.ests[3])) J.n[3,4:5]<-c(mean(eterm*dterm),mean(X1*eterm*dterm)) J.n[4:5,4:5]<-t(cbind(1,X1) * matrix(eterm,nrow=N,ncol=2)) %*% cbind(1,X1)/N round(J.n,6) V1<-solve(J.n) %*% (I.n %*% t(solve(J.n))) round(V1,6) @ Thus the asymptotic variance of $\widehat \psi_0$ (that is, the marginal asymptotic variance of $\sqrt{n}(\widehat \psi_0 - \psi_0^\true)$) under propensity score estimation is approximately $\Sexpr{V1[2,2]}$ \bigskip \item For \textbf{G-estimation} we stack the estimating equations \[ \sum_{i=1}^n \begin{pmatrix} (y_i - \beta_0 - z_i \psi_0) \\[6pt] (z_i-e(x_i; \alpha))(y_i - \beta_0 - z_i \psi_0) \\[6pt] \bx_i^\top (z_i - e(x_i; \alpha)) \end{pmatrix} = \begin{pmatrix} 0 \\[6pt] 0 \\[6pt] 0 \end{pmatrix} \] The score vector is \[ \bU(\theta) = \begin{pmatrix} (Y - \beta_0 - Z \psi_0) \\[6pt] (Z-e(X; \alpha))(Y - \beta_0 - Z \psi_0) \\[6pt] \bX^\top (Z - e(X; \alpha)) \end{pmatrix} = \begin{pmatrix} \varepsilon_Y \\[6pt] \epsilon_Z \varepsilon_Y \\[6pt] \bX^\top \epsilon_Z \end{pmatrix} \] where \[ \varepsilon_Y = (Y - \beta_0 - Z \psi_0) \] Therefore \[ \mathcal{I} = \E \begin{bmatrix} \varepsilon_Y^2 & \epsilon_Z \varepsilon_Y^2 & \bX \epsilon_Z \varepsilon_Y \\[6pt] \epsilon_Z \varepsilon_Y^2 & \epsilon_Z^2 \varepsilon_Y^2 & \bX \epsilon_Z^2 \varepsilon_Y \\[6pt] \bX^\top \epsilon_Z \varepsilon_Y & \bX^\top \epsilon_Z^2 \varepsilon_Y & \bX^\top \bX \epsilon_Z^2 \end{bmatrix} \qquad \qquad \mathcal{J} = \E \begin{bmatrix} 1 & Z & \zerovec \\[6pt] \epsilon_Z & Z \epsilon_Z & \bX e (1-e) \varepsilon_Y\\[6pt] \zerovec & \zerovec & \bX^\top \bX e (1-e) \end{bmatrix}. \] Now, the first component of the score vector, $\varepsilon_Y$, is required to have expectation zero; also, we know by assumption that this term does not depend on $Z$. Therefore, \[ \E_{Y|X,Z}[\varepsilon_Y | X,Z] = h(X) \qquad \text{with} \qquad \E_X[h(X)] = 0 \] and \[ \E_{Y|X,Z}[\varepsilon_Y^2 | X,Z] = v(X). \] Hence by iterated expectation we must have that \[ \E_{X,Y,Z}[\varepsilon_Y] = 0. \] Similarly we require by the estimating equation for $\alpha$ that \[ \E_{Z|X}[\epsilon_Z | X] = 0 \] and hence \[ \E_{X,Y,Z}[\epsilon_Z] = \E_{X,Y,Z}[\epsilon_Z \varepsilon_Y^2] = 0 \qquad \E_{X,Y,Z}[\bX \epsilon_Z \varepsilon_Y] = \zerovec. \] We also know that \[ \E_{Z|X}[\epsilon_Z^2 | X] = \E_{Z|X}[(Z-e(X))^2 | X] = e(X) (1-e(X)). \] Thus \[ \mathcal{I} = \E \begin{bmatrix} v(X) & 0 & 0 & 0 \\[6pt] 0 & e(1-e) v(X) & e(1-e) h(X) & e(1-e) X h(X) \\[6pt] 0 & e(1-e) h(X) & e(1-e) & e(1-e) X \\[6pt] 0 & e(1-e) X h(X) & e(1-e) X & X^2 e(1-e) & \end{bmatrix} = \begin{bmatrix} \mathcal{I}_{11} & \mathcal{I}_{12} \\[6pt] \mathcal{I}_{21} & \mathcal{I}_{22} \end{bmatrix} \] and \[ \mathcal{J} = \E \begin{bmatrix} 1 & e & 0 & 0 \\[6pt] 0 & e(1-e) & e (1-e) h(X) & e (1-e) X h(X) \\[6pt] 0 & 0 & e (1-e) & e (1-e) X \\[6pt] 0 & 0 & e (1-e) X & e (1-e) X^2 \end{bmatrix} = \begin{bmatrix} \mathcal{J}_{11} & \mathcal{J}_{12} \\[6pt] \zerovec & \mathcal{J}_{22} \end{bmatrix}. \] On multiplying out, and noting that $\mathcal{I}_{22} = \mathcal{J}_{22}$, we can conclude that $\bV = \mathcal{J}^{-1} \mathcal{I} \mathcal{J}^{-\top}$ is \textbf{block diagonal}, that is \[ \bV = \begin{bmatrix} \bV_{11} & \zerovec \\ \zerovec & \bV_{22} \end{bmatrix} \] from which we can conclude that asymptotically $(\widehat \beta_0, \widehat \psi_0) \independent \widehat \alpha$, that is, the estimators are \textbf{independent}. <>= Zm1<-cbind(1,Z-eX) Zm2<-cbind(1,Z) g.ests<-solve(t(Zm1) %*% Zm2) %*% t(Zm1) %*% Y res1<-Y-g.ests[1]-g.ests[2]*Z res2<-Z-eX I.n<-matrix(0,4,4) I.n[1,1]<-mean(res1^2) I.n[1,2]<-I.n[2,1]<-mean(res2*res1^2) I.n[1,3]<-I.n[3,1]<-mean(res2*res1) I.n[1,4]<-I.n[4,1]<-mean(X1*res2*res1) I.n[2,2]<-mean(res2^2*res1^2) I.n[2,3]<-I.n[3,2]<-mean(res2^2*res1) I.n[2,4]<-I.n[4,2]<-mean(X1*res2^2*res1) I.n[3,3]<-mean(res2^2) I.n[3,4]<-I.n[4,3]<-mean(X1*res2^2) I.n[4,4]<-mean(X1^2*res2^2) round(I.n,6) J.n<-matrix(0,4,4) J.n[1,1]<-1 J.n[1,2]<-mean(Z) J.n[2,1]<-mean(res2) J.n[2,2]<-mean(Z*res2) J.n[2,3]<-mean(eX*(1-eX)*res1) J.n[2,4]<-mean(X1*eX*(1-eX)*res1) J.n[3,3]<-mean(eX*(1-eX)) J.n[3,4]<-J.n[4,3]<-mean(X1*eX*(1-eX)) J.n[4,4]<-mean(X1^2*eX*(1-eX)) round(J.n,6) V2<-solve(J.n) %*% (I.n %*% t(solve(J.n))) round(V2,6) @ \bigskip \item \textbf{Inverse Probability Weighting:} The original IPW estimators are \[ \widetilde \mu_{\text{IPW}} (0) = \dfrac{1}{n} \sum\limits_{i=1}^n \dfrac{(1-Z_i) Y_i}{1-e(X_i)} \qquad \qquad \widetilde \mu_{\text{IPW}}(1) = \dfrac{1}{n} \sum\limits_{i=1}^n \dfrac{Z_i Y_i}{e(X_i)}. \] and in standardized weight form \[ \widehat \mu_{\text{IPW}} (0) = \dfrac{\dfrac{1}{n} \sum\limits_{i=1}^n \dfrac{(1-Z_i) Y_i}{1-e(X_i)} }{\dfrac{1}{n} \sum\limits_{i=1}^n \dfrac{(1-Z_i)}{1-e(X_i)}} \qquad \qquad \widehat \mu_{IPW}(1) = \dfrac{\dfrac{1}{n} \sum\limits_{i=1}^n \dfrac{Z_i Y_i}{e(X_i)}}{\dfrac{1}{n} \sum\limits_{i=1}^n \dfrac{Z_i}{e(X_i)}}. \] The standardized weight estimators arise from the fit of a weighted least squares problem; if we denote $\beta_0 = \mu (0)$ and $\psi_0 = \mu(1) - \mu(0)$, with $\theta = (\beta_0,\psi_0)$, then \[ (\widehat \beta_0, \widehat \psi_0) = \arg \min_{(\beta,\psi)} \sum_{i=1}^n R_i (Y_i - \beta_0 - Z_i \psi_0)^2 \] where for each $i$ \[ R_i = \dfrac{(1-Z_i)}{1-e(X_i)} + \dfrac{Z_i}{e(X_i)} = R_{0i} + R_{1i} \] say. \bigskip We have for the estimating equations \[ \sum_{i=1}^n \begin{pmatrix} r_i (y_i - \beta_0 - z_i \psi_0) \\[6pt] r_i z_i (y_i - \beta_0 - z_i \psi_0) \\[6pt] \bx_i^\top (z_i - e(x_i; \alpha)) \end{pmatrix} = \begin{pmatrix} 0 \\[6pt] 0 \\[6pt] 0 \end{pmatrix} \] The score vector is \[ \bU(\theta) = \begin{pmatrix} R (Y - \beta_0 - Z \psi_0) \\[6pt] R Z (Y - \beta_0 - Z \psi_0) \\[6pt] \bX^\top (Z - e(X; \alpha)) \end{pmatrix} = \begin{pmatrix} R \varepsilon_Y \\[6pt] R Z \varepsilon_Y \\[6pt] \bX^\top \epsilon_Z \end{pmatrix} \] Therefore \[ \mathcal{I} = \E \begin{bmatrix} R^2 \varepsilon_Y^2 & R^2 Z \varepsilon_Y^2 & \bX R \epsilon_Z \varepsilon_Y \\[6pt] R^2 Z \varepsilon_Y^2 & R^2 Z^2 \varepsilon_Y^2 & \bX R Z \epsilon_Z \varepsilon_Y \\[6pt] \bX^\top R \epsilon_Z \varepsilon_Y & \bX^\top R Z \epsilon_Z \varepsilon_Y & \bX^\top \bX \epsilon_Z^2 \end{bmatrix} \qquad \qquad \mathcal{J} = \E \begin{bmatrix} R & R Z & -\dot{\bR}^\top \varepsilon_Y \\[6pt] R Z & RZ^2 & -\dot{\bR}^\top Z \varepsilon_Y \\[6pt] \zerovec & \zerovec & \bX^\top \bX e (1-e) \end{bmatrix}. \] where \[ \dot{\bR} = \frac{d R}{d \alpha} = \frac{1-Z}{1-e(X)} e(X) \bX^\top - \frac{Z}{e(X)} (1-e(X)) \bX^\top. \] <>= eX<-fitted(glm(Z~X1,family=binomial)) w0<-(1-Z)/(1-eX); W0<-w0/sum(w0) w1<-Z/eX; W1<-w1/sum(w1) w<-w0+w1 #IPW via original weights mean(w1*Y)-mean(w0*Y) #IPW via Standardized weights sum(W1*Y)-sum(W0*Y) #IPW via regression fit.w<-lm(Y~Z,weights=w) coef(summary(fit.w)) @ <>= ipw.ests<-coef(fit.w) res1<-Y-ipw.ests[1]-ipw.ests[2]*Z res2<-Z-eX R<-w Rdot1<-((1-Z)/(1-eX))*eX-(Z/eX)*(1-eX) Rdot2<-X1*Rdot1 I.n<-matrix(0,4,4) I.n[1,1]<-mean(R^2*res1^2) I.n[1,2]<-I.n[2,1]<-mean(R^2*Z*res1^2) I.n[1,3]<-I.n[3,1]<-mean(R*res2*res1) I.n[1,4]<-I.n[4,1]<-mean(X1*R*res2*res1) I.n[2,2]<-mean(R^2*Z^2*res1^2) I.n[2,3]<-I.n[3,2]<-mean(R*Z*res2*res1) I.n[2,4]<-I.n[4,2]<-mean(X1*R*Z*res2*res1) I.n[3,3]<-mean(res2^2) I.n[3,4]<-I.n[4,3]<-mean(X1*res2^2) I.n[4,4]<-mean(X1^2*res2^2) round(I.n,6) J.n<-matrix(0,4,4) J.n[1,1]<-mean(R) J.n[1,2]<-mean(R*Z) J.n[1,3]<-mean(-Rdot1*res1) J.n[1,4]<-mean(-Rdot2*res1) J.n[2,1]<-mean(R*Z) J.n[2,2]<-mean(R*Z^2) J.n[2,3]<-mean(-Rdot1*Z*res1) J.n[2,4]<-mean(-Rdot2*Z*res1) J.n[3,3]<-mean(res2^2) J.n[3,4]<-J.n[4,3]<-mean(X1*res2^2) J.n[4,4]<-mean(X1^2*res2^2) round(J.n,6) V3<-solve(J.n) %*% (I.n %*% t(solve(J.n))) round(V3,6) @ Again, we note that the matrix is block-diagonal, and therefore that asymptotically $(\widehat \beta_0, \widehat \psi_0) \independent \widehat \alpha$, that is, the estimators are \textbf{independent} \end{itemize} \pagebreak <>= #Monte Carlo study nreps<-20000 n<-1000 psr.ests<-matrix(0,nrow=nreps,ncol=3) g.ests<-ipw.ests<-al.ests<-matrix(0,nrow=nreps,ncol=2) V1.ests<-array(0,c(nreps,5,5)) V2.ests<-V3.ests<-array(0,c(nreps,4,4)) for(irep in 1:nreps){ X1<-rnorm(n,muX,sigX) Xm<-cbind(1,X1) ps.true<-expit(Xm %*% al) Z<-rbinom(n,1,ps.true) Xb<-cbind(1,Z,X1,X1^2) Y<-Xb %*% be + rnorm(n)*sigY fit.p<-glm(Z~X1,family=binomial) eX<-fitted(fit.p) w0<-(1-Z)/(1-eX) W0<-w0/sum(w0) w1<-Z/eX W1<-w1/sum(w1) w<-w0+w1 psr.fit<-coef(lm(Y~Z+eX)) psr.ests[irep,]<-psr.fit ipw.ests[irep,]<-coef(summary(lm(Y~Z,weights=w)))[1:2] al.ests[irep,]<-coef(fit.p) Zm1<-cbind(1,Z-eX) Zm2<-cbind(1,Z) g.ests[irep,]<-solve(t(Zm1) %*% Zm2) %*% t(Zm1) %*% Y #PSR variance estimate res1<-Y-psr.fit[1]-psr.fit[2]*Z - psr.fit[3]*eX res2<-Z-eX eterm<-eX*(1-eX) dterm<-eX*psr.fit[3]-res1 Xm1<-cbind(1,Z,eX) * matrix(res1,nrow=n,ncol=3) Xm2<-cbind(1,X1) * matrix(res2,nrow=n,ncol=2) Xm<-cbind(Xm1,Xm2) I.n<-matrix((t(Xm) %*% Xm)/n,5,5) J.n<-matrix(0,5,5) J.n[1:3,1:3]<-crossprod(cbind(1,Z,eX))/n J.n[1,4:5]<-c(mean(eterm*psr.fit[3]),mean(X1*eterm*psr.fit[3])) J.n[2,4:5]<-c(mean(Z*eterm*psr.fit[3]),mean(Z*X1*eterm*psr.fit[3])) J.n[3,4:5]<-c(mean(eterm*dterm),mean(X1*eterm*dterm)) J.n[4:5,4:5]<-t(cbind(1,X1) * matrix(eterm,nrow=n,ncol=2)) %*% cbind(1,X1)/n V1.ests[irep,,]<-solve(J.n) %*% (I.n %*% t(solve(J.n)))/n #G-estimation variance estimate res1<-Y-g.ests[irep,1]-g.ests[irep,2]*Z res2<-Z-eX I.n<-matrix(0,4,4) I.n[1,1]<-mean(res1^2) I.n[1,2]<-I.n[2,1]<-mean(res2*res1^2) I.n[1,3]<-I.n[3,1]<-mean(res2*res1) I.n[1,4]<-I.n[4,1]<-mean(X1*res2*res1) I.n[2,2]<-mean(res2^2*res1^2) I.n[2,3]<-I.n[3,2]<-mean(res2^2*res1) I.n[2,4]<-I.n[4,2]<-mean(X1*res2^2*res1) I.n[3,3]<-mean(res2^2) I.n[3,4]<-I.n[4,3]<-mean(X1*res2^2) I.n[4,4]<-mean(X1^2*res2^2) J.n<-matrix(0,4,4) J.n[1,1]<-1 J.n[1,2]<-mean(Z) J.n[2,1]<-mean(res2) J.n[2,2]<-mean(Z*res2) J.n[2,3]<-mean(eX*(1-eX)*res1) J.n[2,4]<-mean(X1*eX*(1-eX)*res1) J.n[3,3]<-mean(eX*(1-eX)) J.n[3,4]<-J.n[4,3]<-mean(X1*eX*(1-eX)) J.n[4,4]<-mean(X1^2*eX*(1-eX)) V<-solve(J.n) %*% (I.n %*% t(solve(J.n)))/n V2.ests[irep,,]<-V #IPW Variance estimate res1<-Y-ipw.ests[irep,1]-ipw.ests[irep,2]*Z res2<-Z-eX R<-w Rdot1<-((1-Z)/(1-eX))*eX-(Z/eX)*(1-eX) Rdot2<-X1*Rdot1 I.n<-matrix(0,4,4) I.n[1,1]<-mean(R^2*res1^2) I.n[1,2]<-I.n[2,1]<-mean(R^2*Z*res1^2) I.n[1,3]<-I.n[3,1]<-mean(R*res2*res1) I.n[1,4]<-I.n[4,1]<-mean(X1*R*res2*res1) I.n[2,2]<-mean(R^2*Z^2*res1^2) I.n[2,3]<-I.n[3,2]<-mean(R*Z*res2*res1) I.n[2,4]<-I.n[4,2]<-mean(X1*R*Z*res2*res1) I.n[3,3]<-mean(res2^2) I.n[3,4]<-I.n[4,3]<-mean(X1*res2^2) I.n[4,4]<-mean(X1^2*res2^2) J.n<-matrix(0,4,4) J.n[1,1]<-mean(R) J.n[1,2]<-mean(R*Z) J.n[1,3]<-mean(-Rdot1*res1) J.n[1,4]<-mean(-Rdot2*res1) J.n[2,1]<-mean(R*Z) J.n[2,2]<-mean(R*Z^2) J.n[2,3]<-mean(-Rdot1*Z*res1) J.n[2,4]<-mean(-Rdot2*Z*res1) J.n[3,3]<-mean(res2^2) J.n[3,4]<-J.n[4,3]<-mean(X1*res2^2) J.n[4,4]<-mean(X1^2*res2^2) V<-solve(J.n) %*% (I.n %*% t(solve(J.n)))/n V3.ests[irep,,]<-V } @ <>= par(mar=c(4,4,2,0)) psi0.ests<-cbind(psr.ests[,2],g.ests[,2],ipw.ests[,2]) colnames(psi0.ests)<-c('PSR','G-est','IPW') boxplot(psi0.ests,names=c('PSR','G-est','IPW'),pch=19,cex=0.5) abline(h=3,col='red',lty=2) @ <>= #Variances apply(psi0.ests,2,var) @ <>= #Propensity score regression variances round(apply(V1.ests,2:3,mean),6) #Monte Carlo sandwich estimate round(cov(cbind(psr.ests,al.ests)),6) #Empirical estimate round(V1/n,6) #Monte Carlo sandwich estimate (large sample) #G-estimation regression variances round(apply(V2.ests,2:3,mean),6) #Monte Carlo sandwich estimate round(cov(cbind(g.ests,al.ests)),6) #Empirical estimate round(V2/n,6) #Monte Carlo sandwich estimate (large sample) #IPW variances round(apply(V3.ests,2:3,mean),6) #Monte Carlo sandwich estimate round(cov(cbind(ipw.ests,al.ests)),6) #Empirical estimate round(V3/n,6) #Monte Carlo sandwich estimate (large sample) @ <>= nvec<-c(expression(hat(beta)[0]),expression(hat(psi)[0]), expression(hat(alpha)[0]),expression(hat(alpha)[1])) nvecp<-c(expression(hat(beta)[0]),expression(hat(psi)[0]),expression(hat(phi)[0]), expression(hat(alpha)[0]),expression(hat(alpha)[1])) pairs(cbind(psr.ests,al.ests),labels=nvecp,pch=19,cex=0.5,main = "PSR") print('Correlation matrix') cor(cbind(psr.ests,al.ests)) pairs(cbind(g.ests,al.ests),labels=nvec,pch=19,cex=0.5,main = "G-est") print('Correlation matrix') cor(cbind(g.ests,al.ests)) pairs(cbind(ipw.ests,al.ests),labels=nvec,pch=19,cex=0.5,main = "IPW") print('Correlation matrix') cor(cbind(ipw.ests,al.ests)) @ Here the sandwich estimates match the empirical estimates well. \end{document}