\documentclass[notitlepage]{article} \usepackage{Math} %\usepackage{../../Math598} \usepackage{listings} \usepackage{numprint} \usepackage{enumerate} \usepackage{bm} \usepackage{bbm} \usepackage{verbatim} \usepackage{mathtools} \usepackage{dsfont} \usepackage{scalerel} \usepackage{amsfonts,amsmath,amssymb} \usepackage{tikz} \usetikzlibrary{shapes,decorations,arrows,calc,arrows.meta,fit,positioning} \usepackage{pgfplots} \pgfplotsset{compat=1.17} \newcommand*\circled[1]{\tikz[baseline=(char.base)]{ \node[shape=circle,draw,inner sep=2pt] (char) {#1};}} \usepackage{xcolor} \newtheorem{alg}{Algorithm} \newtheorem{ex}{Example} \newtheorem{note}{Note} \newenvironment{proof}{\par\noindent{\normalfont{\bf{Proof }}}} {\hfill \small$\blacksquare$\\[2mm]} \setlength{\parindent}{0in} \def\E{\mathbbmss{E}} \def\Var{\text{Var}} \def\Cov{\text{Cov}} \def\Corr{\text{Corr}} \def\bW{\mathbf{W}} \def\bH{\mathbf{H}} \newcommand{\bs}[1]{\bm{#1}} \newcommand{\Rho}{\mathrm{P}} \newcommand{\cif}{\text{ if }} \newcommand{\Ra}{\Longrightarrow} \def\bphi{\boldsymbol \phi} \lstloadlanguages{R} \definecolor{keywordcolor}{rgb}{0,0.6,0.6} \definecolor{delimcolor}{rgb}{0.461,0.039,0.102} \definecolor{Rcommentcolor}{rgb}{0.101,0.043,0.432} \lstdefinestyle{Rsettings}{ basicstyle=\ttfamily, breaklines=true, showstringspaces=false, keywords={if, else, function, theFunction, tmp}, % Write as many keywords otherkeywords={}, commentstyle=\itshape\color{Rcommentcolor}, keywordstyle=\color{keywordcolor}, moredelim=[s][\color{delimcolor}]{"}{"}, } \lstset{basicstyle=\ttfamily, numbers=none, literate={~} {$\sim$}{2}} \def\Xb{\mathbf{X}_{\beta}} \def\Xp{\mathbf{X}_{\psi}} \def\beps{\boldsymbol{\epsilon}} \def\betahat{\widehat \beta} \def\psihat{\widehat \psi} \begin{document} <>= library(knitr) # global chunk options opts_chunk$set(cache=TRUE, autodep=TRUE) options(scipen=6) options(repos=c(CRAN="https://cloud.r-project.org/")) knit_hooks$set(document = function(x) {sub('\\usepackage[]{color}', '\\usepackage{xcolor}', x, fixed = TRUE)}) @ \begin{center} {\textsclarge{Project 3: Solutions}} \end{center} \tikzset{ -Latex,auto,node distance =1 cm and 1 cm,semithick, state/.style ={circle, draw, minimum width = 0.7 cm}, box/.style ={rectangle, draw, minimum width = 0.7 cm, fill=lightgray}, point/.style = {circle, draw, inner sep=0.08cm,fill,node contents={}}, bidirected/.style={Latex-Latex,dashed}, el/.style = {inner sep=3pt, align=left, sloped} } The data generating conditional mean model for binary treatment is \[ \E_{Y|X,Z}^\calO[Y|X=x,Z=z] = 1 + x + z + x z \] with confounder $X \sim Normal(0.5,0.5^2)$, and the conditional model for $Z|X=x$ is $Bernoulli(e(x))$ with \[ e(x) = \frac{\exp\{-1.5 + 2 x + x^2\}}{1+\exp\{-1.5 + 2 x + x^2 \}}. \] In the analysis a parametric model with parameter $\alpha$ estimated via the logistic regression model is used. <>= #Calculation for large N library(mvnfast) set.seed(22087) N<-1000000 muX<-0.5;sigX<-0.5 X1<-rnorm(N,muX,sigX) al<-c(-1.5,2,1) expit<-function(x){return(1/(1+exp(-x)))} Xa<-cbind(1,X1,X1^2) be<-c(1,1,1,1) sigY<-1 ps.true<-expit(Xa %*% al) Z<-rbinom(N,1,ps.true) Xb<-cbind(1,X1,Z,Z*X1) Y<-Xb %*% be + rnorm(N)*sigY par(mar=c(4,4,2,0)) hist(ps.true,breaks=seq(0,1,by=0.02),main='True propensity score',xlab=expression(e(x))) box() @ <>= #Correct model coef(summary(lm(Y~X1+Z+X1:Z))) #Incorrect model coef(summary(lm(Y~Z))) @ \bigskip \textbf{G-estimation} The \textbf{G-estimating} form is \[ \sum_{i=1}^n \begin{pmatrix} 1 \\ (z_i-e(x_i;\widehat \alpha)) \\[6pt] (z_i-e(x_i;\widehat \alpha)) x_i \end{pmatrix} (y_i - \beta_0 - z_i \psi_0 - z_i x_i \psi_1) = \begin{pmatrix} 0 \\[6pt] 0 \\[6pt] 0\end{pmatrix} \] that is, using the estimating (score) function \[ \bZ_1^\top (\by - \bZ_2 \theta) = \zerovec \] say, where $\theta = (\beta_0, \psi_0)$. \[ \bZ_1 = \begin{pmatrix} 1 & z_1 - e(x_1;\widehat \alpha) & (z_1 - e(x_1;\widehat \alpha)) x_1\\[3pt] 1 & z_2 - e(x_2;\widehat \alpha) & (z_2 - e(x_2;\widehat \alpha)) x_2 \\[3pt] \vdots & \vdots & \vdots \\[3pt] 1 & z_n - e(x_n;\widehat \alpha) & (z_n - e(x_n;\widehat \alpha)) x_n \end{pmatrix} \qquad \bZ_2 = \begin{pmatrix} 1 & z_1 & z_1 x_1 \\[3pt] 1 & z_2 & z_2 x_2\\[3pt] \vdots & \vdots & \vdots \\[3pt] 1 & z_n & z_n x_n \end{pmatrix} \] The solution is therefore \[ \begin{pmatrix} \widehat \beta_0 \\[6pt] \widehat \psi_0 \\[6pt] \widehat \psi_1 \end{pmatrix} = (\bZ_1^\top \bZ_2)^{-1} \bZ_1^\top \by \] <>= eX<-fitted(glm(Z~X1+I(X1^2),family=binomial)) Zm1<-cbind(1,Z-eX,(Z-eX)*X1) Zm2<-cbind(1,Z,Z*X1) g.ests<-solve(t(Zm1) %*% Zm2) %*% t(Zm1) %*% Y g.ests @ We can incorporate the estimation of the propensity score parameters into the same estimating equation formulation by adding in the estimating equation for $\alpha$, namely, for the logistic regression model, the equations \[ \sum_{i=1}^n \bx_i^\top (z_i - e(x_i;\alpha) )= \zerovec. \] We can compute an estimate of the variance-covariance matrix using the theory of estimating equations. For the $p \times 1$ system of estimating equations \[ \sum_{i=1}^n \bU_i(\theta) = \zerovec \] with $\E[\bU(\theta_0)] = \zerovec$, we have that \[ \sqrt{n}(\widehat \theta_n - \theta_0) \CiD Normal_p(\zerovec,\bV) \] where \[ \bV = \mathcal{J}^{-1} \mathcal{I} \mathcal{J}^{-\top} \] with \[ \mathcal{I} = \E[ \bU(\theta_0) \bU(\theta_0)^\top] \qquad \qquad \mathcal{J} = \E[\dot{\bU}(\theta_0)] \] both $(p \times p)$ matrices, and \[ \dot{\bU}(\theta_0) = -\dfrac{\partial \bU(\theta)}{\partial \theta^\top} \bigg |_{\theta = \theta_0} \] We proceed by computing estimates, $\widehat I_n$ and $\widehat J_n$, of the two matrices based on the observed data, and then computing \[ \widehat \bV = \widehat J_n^{-1} \widehat I_n \widehat J_n^{-\top} \] to estimate the asymptotic variance; this approach is known as \textit{sandwich} or \textit{robust} variance estimation. \begin{enumerate}[(a)] \item \textbf{\textit{Known propensity score:}} If \[ \epsilon_Y = (Y - \beta_0 - Z \psi_0 - Z X \psi_1) \qquad \qquad \epsilon_Z = (Z-e(X;\alpha)) \] then \[ \bU(\theta) = \begin{pmatrix} (Y - \beta_0 - Z \psi_0 - Z X \psi_1) \\[6pt] (Z-e(X; \alpha))(Y - \beta_0 - Z \psi_0 - Z X \psi_1) \\[6pt] (Z-e(X; \alpha)) X (Y - \beta_0 - Z \psi_0 - Z X \psi_1) \end{pmatrix} = \begin{pmatrix} \epsilon_Y \\[6pt] \epsilon_Z \epsilon_Y \\[6pt] \epsilon_Z X \epsilon_Y \end{pmatrix} \] and the derivative is \[ \dot{\bU}(\theta) = \begin{bmatrix} 1 & Z & ZX\\[6pt] \epsilon_Z & Z \epsilon_Z & ZX \epsilon_Z \\[6pt] X \epsilon_Z & Z X \epsilon_Z & ZX^2 \epsilon_Z \end{bmatrix} \] and the $\calI$ and $\calJ$ matrices are $3 \times 3$. \item \textbf{\textit{Unknown propensity score:}} If $\bX = (1,X,X^2)$, the score vector is \[ \bU(\theta) = \begin{pmatrix} (Y - \beta_0 - Z \psi_0 - Z X \psi_1) \\[6pt] (Z-e(X; \alpha))(Y - \beta_0 - Z \psi_0 - Z X \psi_1) \\[6pt] (Z-e(X; \alpha)) X (Y - \beta_0 - Z \psi_0 - Z X \psi_1) \\[6pt] \bX^\top (Z - e(X; \alpha)) \end{pmatrix} = \begin{pmatrix} \epsilon_Y \\[6pt] \epsilon_Z \epsilon_Y \\[6pt] \epsilon_Z X \epsilon_Y \\[6pt] \bX^\top \epsilon_Z \end{pmatrix} \] and writing $e \equiv e(X;\alpha)$, the derivative is \[ \dot{\bU}(\theta) = \begin{bmatrix} 1 & Z & ZX & \zerovec\\[6pt] \epsilon_Z & Z \epsilon_Z & ZX \epsilon_Z & e (1-e) \bX \epsilon_Y \\[6pt] X \epsilon_Z & Z X \epsilon_Z & ZX^2 \epsilon_Z & e (1-e) X \bX \epsilon_Y\\[6pt] 0 & 0 & 0 & e (1-e) \bX^\top \bX \end{bmatrix} \] and the $\calI$ and $\calJ$ matrices are $6 \times 6$. \end{enumerate} <>= #Large sample calculation eX<-ps.true Zm1<-cbind(1,Z-eX,X1*(Z-eX)) Zm2<-cbind(1,Z,Z*X1) g.ests0<-solve(t(Zm1) %*% Zm2) %*% t(Zm1) %*% Y #G-estimation variance estimate res1<-Y-g.ests0[1]-g.ests0[2]*Z - g.ests0[3]*Z*X1 res2<-Z-eX I.n<-matrix(0,3,3) I.n[1,1]<-mean(res1^2) I.n[1,2]<-I.n[2,1]<-mean(res1^2*res2) I.n[1,3]<-I.n[3,1]<-mean(res1^2*res2*X1) I.n[2,2]<-mean(res1^2*res2^2) I.n[2,3]<-I.n[3,2]<-mean(res1^2*res2^2*X1) I.n[3,3]<-mean(res1^2*res2^2*X1^2) J.n<-matrix(0,3,3) J.n[1,1]<-1 J.n[1,2]<-mean(Z) J.n[1,3]<-mean(Z*X1) J.n[2,1]<-mean(res2) J.n[2,2]<-mean(Z*res2) J.n[2,3]<-mean(Z*X1*res2) J.n[3,1]<-mean(X1*res2) J.n[3,2]<-mean(Z*X1*res2) J.n[3,3]<-mean(Z*X1^2*res2) V1<-solve(J.n) %*% (I.n %*% t(solve(J.n))) ############################################################### fit.p<-glm(Z~X1+I(X1^2),family=binomial) eX<-fitted(fit.p) Zm1<-cbind(1,Z-eX,X1*(Z-eX)) Zm2<-cbind(1,Z,Z*X1) g.ests1<-solve(t(Zm1) %*% Zm2) %*% t(Zm1) %*% Y #G-estimation variance estimate res1<-Y-g.ests1[1]-g.ests1[2]*Z - g.ests1[3]*Z*X1 res2<-Z-eX I.n<-matrix(0,6,6) I.n[1,1]<-mean(res1^2) I.n[1,2]<-I.n[2,1]<-mean(res1^2*res2) I.n[1,3]<-I.n[3,1]<-mean(res1^2*res2*X1) I.n[1,4]<-I.n[4,1]<-mean(res1*res2) I.n[1,5]<-I.n[5,1]<-mean(res1*res2*X1) I.n[1,6]<-I.n[6,1]<-mean(res1*res2*X1^2) I.n[2,2]<-mean(res1^2*res2^2) I.n[2,3]<-I.n[3,2]<-mean(res1^2*res2^2*X1) I.n[2,4]<-I.n[4,2]<-mean(res1*res2^2) I.n[2,5]<-I.n[5,2]<-mean(res1*res2^2*X1) I.n[2,6]<-I.n[6,2]<-mean(res1*res2^2*X1^2) I.n[3,3]<-mean(res1^2*res2^2*X1^2) I.n[3,4]<-I.n[4,3]<-mean(res1*res2^2*X1) I.n[3,5]<-I.n[5,3]<-mean(res1*res2^2*X1^2) I.n[3,6]<-I.n[6,3]<-mean(res1*res2^2*X1^3) I.n[4,4]<-mean(res2^2) I.n[4,5]<-I.n[5,4]<-mean(X1*res2^2) I.n[4,6]<-I.n[6,4]<-mean(X1^2*res2^2) I.n[5,5]<-mean(X1^2*res2^2) I.n[5,6]<-I.n[6,5]<-mean(X1^3*res2^2) I.n[6,6]<-I.n[6,6]<-mean(X1^4*res2^2) J.n<-matrix(0,6,6) vX<-eX*(1-eX) J.n[1,1]<-1 J.n[1,2]<-mean(Z) J.n[1,3]<-mean(Z*X1) J.n[2,1]<-mean(res2) J.n[2,2]<-mean(Z*res2) J.n[2,3]<-mean(Z*X1*res2) J.n[2,4]<-mean(vX*res1) J.n[2,5]<-mean(vX*res1*X1) J.n[2,6]<-mean(vX*res1*X1^2) J.n[3,1]<-mean(X1*res2) J.n[3,2]<-mean(Z*X1*res2) J.n[3,3]<-mean(Z*X1^2*res2) J.n[3,4]<-mean(vX*X1*res1) J.n[3,5]<-mean(vX*res1*X1^2) J.n[3,6]<-mean(vX*res1*X1^3) J.n[4,4]<-mean(vX) J.n[4,5]<-J.n[5,4]<-mean(X1*vX) J.n[4,6]<-J.n[6,4]<-mean(X1^2*vX) J.n[5,5]<-mean(X1^2*vX) J.n[5,6]<-J.n[6,5]<-mean(X1^3*vX) J.n[6,6]<-J.n[6,6]<-mean(X1^4*vX) V2<-solve(J.n) %*% (I.n %*% t(solve(J.n))) @ <>= #Monte Carlo study nreps<-20000 n<-1000 psr.ests<-matrix(0,nrow=nreps,ncol=3) g.ests0<-g.ests1<-al.ests<-matrix(0,nrow=nreps,ncol=3) V1.ests<-array(0,c(nreps,3,3)) V2.ests<-array(0,c(nreps,6,6)) for(irep in 1:nreps){ X1<-rnorm(n,muX,sigX) Xa<-cbind(1,X1,X1^2) ps.true<-expit(Xa %*% al) Z<-rbinom(n,1,ps.true) Xb<-cbind(1,X1,Z,X1*Z) Y<-Xb %*% be + rnorm(n)*sigY eX<-ps.true Zm1<-cbind(1,Z-eX,X1*(Z-eX)) Zm2<-cbind(1,Z,Z*X1) g.ests0[irep,]<-solve(t(Zm1) %*% Zm2) %*% t(Zm1) %*% Y #G-estimation variance estimate res1<-Y-g.ests0[irep,1]-g.ests0[irep,2]*Z - g.ests0[irep,3]*Z*X1 res2<-Z-eX I.n<-matrix(0,3,3) I.n[1,1]<-mean(res1^2) I.n[1,2]<-I.n[2,1]<-mean(res1^2*res2) I.n[1,3]<-I.n[3,1]<-mean(res1^2*res2*X1) I.n[2,2]<-mean(res1^2*res2^2) I.n[2,3]<-I.n[3,2]<-mean(res1^2*res2^2*X1) I.n[3,3]<-mean(res1^2*res2^2*X1^2) J.n<-matrix(0,3,3) J.n[1,1]<-1 J.n[1,2]<-mean(Z) J.n[1,3]<-mean(Z*X1) J.n[2,1]<-mean(res2) J.n[2,2]<-mean(Z*res2) J.n[2,3]<-mean(Z*X1*res2) J.n[3,1]<-mean(X1*res2) J.n[3,2]<-mean(Z*X1*res2) J.n[3,3]<-mean(Z*X1^2*res2) V<-solve(J.n) %*% (I.n %*% t(solve(J.n)))/n V1.ests[irep,,]<-V ############################################################### fit.p<-glm(Z~X1+I(X1^2),family=binomial) eX<-fitted(fit.p) al.ests[irep,]<-coef(fit.p) Zm1<-cbind(1,Z-eX,X1*(Z-eX)) Zm2<-cbind(1,Z,Z*X1) g.ests1[irep,]<-solve(t(Zm1) %*% Zm2) %*% t(Zm1) %*% Y #G-estimation variance estimate res1<-Y-g.ests1[irep,1]-g.ests1[irep,2]*Z - g.ests1[irep,3]*Z*X1 res2<-Z-eX I.n<-matrix(0,6,6) I.n[1,1]<-mean(res1^2) I.n[1,2]<-I.n[2,1]<-mean(res1^2*res2) I.n[1,3]<-I.n[3,1]<-mean(res1^2*res2*X1) I.n[1,4]<-I.n[4,1]<-mean(res1*res2) I.n[1,5]<-I.n[5,1]<-mean(res1*res2*X1) I.n[1,6]<-I.n[6,1]<-mean(res1*res2*X1^2) I.n[2,2]<-mean(res1^2*res2^2) I.n[2,3]<-I.n[3,2]<-mean(res1^2*res2^2*X1) I.n[2,4]<-I.n[4,2]<-mean(res1*res2^2) I.n[2,5]<-I.n[5,2]<-mean(res1*res2^2*X1) I.n[2,6]<-I.n[6,2]<-mean(res1*res2^2*X1^2) I.n[3,3]<-mean(res1^2*res2^2*X1^2) I.n[3,4]<-I.n[4,3]<-mean(res1*res2^2*X1) I.n[3,5]<-I.n[5,3]<-mean(res1*res2^2*X1^2) I.n[3,6]<-I.n[6,3]<-mean(res1*res2^2*X1^3) I.n[4,4]<-mean(res2^2) I.n[4,5]<-I.n[5,4]<-mean(X1*res2^2) I.n[4,6]<-I.n[6,4]<-mean(X1^2*res2^2) I.n[5,5]<-mean(X1^2*res2^2) I.n[5,6]<-I.n[6,5]<-mean(X1^3*res2^2) I.n[6,6]<-I.n[6,6]<-mean(X1^4*res2^2) J.n<-matrix(0,6,6) vX<-eX*(1-eX) J.n[1,1]<-1 J.n[1,2]<-mean(Z) J.n[1,3]<-mean(Z*X1) J.n[2,1]<-mean(res2) J.n[2,2]<-mean(Z*res2) J.n[2,3]<-mean(Z*X1*res2) J.n[2,4]<-mean(vX*res1) J.n[2,5]<-mean(vX*res1*X1) J.n[2,6]<-mean(vX*res1*X1^2) J.n[3,1]<-mean(X1*res2) J.n[3,2]<-mean(Z*X1*res2) J.n[3,3]<-mean(Z*X1^2*res2) J.n[3,4]<-mean(vX*X1*res1) J.n[3,5]<-mean(vX*res1*X1^2) J.n[3,6]<-mean(vX*res1*X1^3) J.n[4,4]<-mean(vX) J.n[4,5]<-J.n[5,4]<-mean(X1*vX) J.n[4,6]<-J.n[6,4]<-mean(X1^2*vX) J.n[5,5]<-mean(X1^2*vX) J.n[5,6]<-J.n[6,5]<-mean(X1^3*vX) J.n[6,6]<-J.n[6,6]<-mean(X1^4*vX) V<-solve(J.n) %*% (I.n %*% t(solve(J.n)))/n V2.ests[irep,,]<-V } @ <>= par(mar=c(4,4,2,0)) psi.ests<-cbind(g.ests0[,2:3],g.ests1[,2:3]) pn<-c(expression(hat(psi)[0]),expression(hat(psi)[1]),expression(hat(psi)[0]),expression(hat(psi)[1])) colnames(psi.ests)<-c('Psi0-0','Psi1-0','Psi0-1','Psi1-1') boxplot(psi.ests,names=pn,pch=19,cex=0.5) abline(h=1,col='red',lty=2) @ <>= #Variances apply(psi.ests,2,var) @ <>= round(apply(V1.ests,2:3,mean),6)[2:3,2:3] #Monte Carlo sandwich estimate round(cov(cbind(g.ests0,al.ests)),6)[2:3,2:3] #Empirical estimate round(V1/n,6)[2:3,2:3] #Monte Carlo sandwich estimate (large sample) round(apply(V2.ests,2:3,mean),6)[2:3,2:3] #Monte Carlo sandwich estimate round(cov(cbind(g.ests1,al.ests)),6)[2:3,2:3] #Empirical estimate round(V2/n,6)[2:3,2:3] #Monte Carlo sandwich estimate (large sample) @ \end{document}