\documentclass[notitlepage]{article} \usepackage{Math} %\usepackage{../../Math598} \usepackage{listings} \usepackage{numprint} \usepackage{enumerate} \usepackage{bm} \usepackage{bbm} \usepackage{verbatim} \usepackage{mathtools} \usepackage{scalerel} \usepackage{dsfont} \usepackage{amsfonts,amsmath,amssymb} \usepackage{tikz} \usetikzlibrary{shapes,decorations,arrows,calc,arrows.meta,fit,positioning} \usepackage{pgfplots} \pgfplotsset{compat=1.18} \newcommand*\circled[1]{\tikz[baseline=(char.base)]{ \node[shape=circle,draw,inner sep=2pt] (char) {#1};}} \usepackage{xcolor} \newtheorem{alg}{Algorithm} \newtheorem{ex}{Example} \newtheorem{note}{Note} \newenvironment{proof}{\par\noindent{\normalfont{\bf{Proof }}}} {\hfill \small$\blacksquare$\\[2mm]} \setlength{\parindent}{0in} \def\E{\mathbbmss{E}} \def\Var{\text{Var}} \def\Cov{\text{Cov}} \def\Corr{\text{Corr}} \def\bW{\mathbf{W}} \def\bH{\mathbf{H}} \newcommand{\bs}[1]{\bm{#1}} \newcommand{\Rho}{\mathrm{P}} \newcommand{\cif}{\text{ if }} \newcommand{\Ra}{\Longrightarrow} \def\bphi{\boldsymbol \phi} \lstloadlanguages{R} \definecolor{keywordcolor}{rgb}{0,0.6,0.6} \definecolor{delimcolor}{rgb}{0.461,0.039,0.102} \definecolor{Rcommentcolor}{rgb}{0.101,0.043,0.432} \lstdefinestyle{Rsettings}{ basicstyle=\ttfamily, breaklines=true, showstringspaces=false, keywords={if, else, function, theFunction, tmp}, % Write as many keywords otherkeywords={}, commentstyle=\itshape\color{Rcommentcolor}, keywordstyle=\color{keywordcolor}, moredelim=[s][\color{delimcolor}]{"}{"}, } \lstset{basicstyle=\ttfamily, numbers=none, literate={~} {$\sim$}{2}} \def\Xb{\mathbf{X}_{\beta}} \def\Xp{\mathbf{X}_{\psi}} \def\beps{\boldsymbol{\epsilon}} \def\betahat{\widehat \beta} \def\psihat{\widehat \psi} \begin{document} <>= library(knitr) # global chunk options opts_chunk$set(cache=FALSE, autodep=TRUE) options(scipen=6) options(repos=c(CRAN="https://cloud.r-project.org/")) knit_hooks$set(document = function(x) {sub('\\usepackage[]{color}', '\\usepackage{xcolor}', x, fixed = TRUE)}) @ \begin{center} {\textsclarge{Inverse Probability Weighting with a Continuous Exposure}} \end{center} \tikzset{ -Latex,auto,node distance =1 cm and 1 cm,semithick, state/.style ={circle, draw, minimum width = 0.7 cm}, box/.style ={rectangle, draw, minimum width = 0.7 cm, fill=lightgray}, point/.style = {circle, draw, inner sep=0.08cm,fill,node contents={}}, bidirected/.style={Latex-Latex,dashed}, el/.style = {inner sep=3pt, align=left, sloped} } The standard approach to inverse probability weighting (IPW) estimation for a continuous treatment is based on the same kind of inverse weight as in the binary treatment case, specifically \[ w(x,z) = \frac{1}{f_{Z|X}(z|x)} \] to define IPW estimators \[ \widetilde \mu_{\text{IPW}} (z) = \dfrac{1}{n} \sum\limits_{i=1}^n \dfrac{\Ind_{\{z\}}(Z_i) Y_i}{f_{Z|X}^\calO (Z_i|X_i)} \qquad \text{and} \qquad \widehat \mu_{\text{IPW}} (z) = \sum\limits_{i=1}^n W_{iz} Y_i \] with \[ W_{iz} = \dfrac{\dfrac{\Ind_{\{z\}}(Z_i)}{f_{Z|X}^\calO (Z_i|X_i)}}{\sum\limits_{j=1}^n \dfrac{\Ind_{z}(Z_j)}{f_{Z|X}^\calO (Z_j|X_j)}} \qquad \qquad \] The challenge with implementing this estimator is that the indicator function $\Ind_{\{z\}}(z_i)$ only picks off the observed treatments $z_i$ that \textbf{exactly} match the target level $z$. This can be overcome by taking a small window around the target level, $(z-d,z+d)$ say. The weight is thus modified to be \[ w(x,z) = \frac{\Ind_{(z-d,z+d)}(Z_i)}{f_{Z|X}(z|x)} \] which requires a modification to the $\widetilde \mu_{\text{IPW}} (z)$ estimator, namely \[ \widetilde \mu_{\text{IPW}} (z) = \dfrac{1}{2dn} \sum\limits_{i=1}^n \dfrac{\Ind_{(z-d,z+d)}(Z_i) Y_i}{f_{Z|X}^\calO (Z_i|X_i)} \] \bigskip \textbf{EXAMPLE:} In this example, we have a data generating process with $k=10$ predictors, joint Normally distributed $X \sim Normal(\bm{\mu},\Sigma)$, with treatment model given by \[ Z | X = x \sim Normal(\bx_\alpha \alpha, \sigma_Z^2) \] and outcome model \[ Y | X = x, Z=z \sim Normal(\mu(x,z), \sigma_Y^2) \] with \[ \mu(x,z) = \bx_0 \beta + \psi_0 z = \beta_0 + \sum_{l=1}^k x_l \beta_l + \psi_0 z. \] In the model, all predictors are predictors of $Z$, but only first three components are confounders. In the analysis \[ \alpha = (4,1,1,1,1,1,1,-2,-2,2)^\top \qquad \beta = (2,-2,2.2,3.6,0,0,0,0,0,0)^\top \] and $\psi_0 = 1$. <>= #Set-up set.seed(865177) library(mvnfast) n<-10000 k<-10 Mu<-sample(-5:5,size=k,rep=T)/2 Sigma<-0.1*0.8^abs(outer(1:k,1:k,'-')) al<-c(4,rep(1,6),rep(-2,4)) be<-rep(0,k+2) be[1:5]<-c(2,1,-2.0,2.2,3.6) sigZ<-5;sigY<-1 #Exploratory sample X<-rmvn(n,mu=Mu,Sigma) Xal<-cbind(1,X) ps.true<-Xal %*% al Z<-rnorm(n,ps.true,sigZ) #Find a plotting grid Z0<-round(mean(Z),0) zgrid<-seq(Z0-5,Z0+5,by=0.1) nZ<-length(zgrid) #Intercept intercept.val<-be[1]+sum(Mu*be[-c(1:2)]) Xm<-cbind(1,Z,X) muval<-as.vector(Xm %*% be) Y<-rnorm(n,muval,sigY) @ The quantity $\psi_0$ measures the unconfounded effect of treatment, as, marginally, \[ \E[Y(z)] = \psi_0 z + \mu_0 \beta. \] where $\mu_0 = (1,\bm{\mu}^\top)$ so that the intercept is $\mu_0 \beta = \Sexpr{intercept.val}$. Here, $\sigma_Z = 5$ and $\sigma_Y = 1$. We carry out a simulation study of 200 replicates with $n=1000$. <>= #Monte Carlo study n<-1000 nreps<-200 d<-0.05 zhat.samp<-ztilde.samp<-matrix(0,nrow=nreps,ncol=nZ) for(irep in 1:nreps){ X<-rmvn(n,mu=Mu,Sigma) Xal<-cbind(1,X) ps.true<-Xal %*% al Z<-rnorm(n,ps.true,sigZ) Xm<-cbind(1,Z,X) muval<-as.vector(Xm %*% be) Y<-rnorm(n,muval,sigY) fden<-lm(Z~X) den<-dnorm(Z,predict(fden),sd(fden$residuals)) zhat<-ztilde<-rep(0,nZ) for(ix in 1:nZ){ zl<-zgrid[ix]-d zu<-zgrid[ix]+d w<-(Z > zl & Z < zu)/den zhat[ix]<-sum(w*Y,na.rm=T)/sum(w,na.rm=T) ztilde[ix]<-mean(w*Y,na.rm=T)/(2*d) } zhat.samp[irep,]<-zhat ztilde.samp[irep,]<-ztilde } @ First we look at the performance of $\widetilde \mu_{\text{IPW}} (z)$: <>= par(mar=c(4,5,3,0)) ztilde.summary<-apply(ztilde.samp,2,quantile,prob=c(0.025,0.50,0.975),na.rm=TRUE) plot(zgrid,ztilde.summary[2,],type='l',xlab='z', ylab=expression(paste(tilde(E),"[Y(z)]")),col='red') lines(zgrid,ztilde.summary[1,],lty=2,col='red') lines(zgrid,ztilde.summary[3,],lty=2,col='red') lines(zgrid,intercept.val+be[2]*zgrid) legend(zgrid[1],28,c(expression(paste(tilde(E),"[Y(z)]")),expression(paste(E,"[Y(z)]"))), col=c('red','black'),lty=1) title(expression(paste(tilde(E),"[Y(z)]"))) fit.true<-lm(ztilde.summary[2,]~zgrid) coef(summary(fit.true)) @ The estimator is quite noisy; in the plot, the Monte Carlo confidence intervals are very wide. Next we look at the performance of $\widehat \mu_{\text{IPW}} (z)$: <>= par(mar=c(4,5,3,0)) zhat.summary<-apply(zhat.samp,2,quantile,prob=c(0.025,0.50,0.975),na.rm=TRUE) plot(zgrid,zhat.summary[2,],type='l',xlab='z', ylab=expression(paste(hat(E),"[Y(z)]")),col='red') lines(zgrid,zhat.summary[1,],lty=2,col='red') lines(zgrid,zhat.summary[3,],lty=2,col='red') legend(zgrid[1],28,c(expression(paste(hat(E),"[Y(z)]")),expression(paste(E,"[Y(z)]"))), col=c('red','black'),lty=1) title(expression(paste(hat(E),"[Y(z)]"))) lines(zgrid,intercept.val+be[2]*zgrid) fit.true<-lm(zhat.summary[2,]~zgrid) coef(summary(fit.true)) @ The performance is much better when the weights are standardized, and the confidence intervals (indicated by dotted lines) are much shorter. \bigskip \textbf{Stabilized Weighting:} It is also sometimes recommended to use the `stabilized' weight based on the (estimated) marginal distribution $f_Z(z)$, that is \[ w(x,z) = \frac{f_Z(z)}{f_{Z|X}(z|x)}. \] <>= #Monte Carlo study n<-1000 nreps<-200 d<-0.05 zstab.samp<-matrix(0,nrow=nreps,ncol=nZ) for(irep in 1:nreps){ X<-rmvn(n,mu=Mu,Sigma) Xal<-cbind(1,X) ps.true<-Xal %*% al Z<-rnorm(n,ps.true,sigZ) Xm<-cbind(1,Z,X) muval<-as.vector(Xm %*% be) Y<-rnorm(n,muval,sigY) fnum<-lm(Z~1) fden<-lm(Z~X) zstab<-rep(0,nZ) ratio<-dnorm(Z,predict(fnum),sd(fnum$residuals))/dnorm(Z,predict(fden),sd(fden$residuals)) for(ix in 1:nZ){ zl<-zgrid[ix]-d zu<-zgrid[ix]+d w<-(Z > zl & Z < zu)*ratio zstab[ix]<-sum(w*Y,na.rm=T)/sum(w,na.rm=T) } zstab.samp[irep,]<-zstab } @ We look at the performance of $\widehat \mu_{\text{IPW}} (z)$ with the stabilized weight: <>= par(mar=c(4,5,2,0)) zstab.summary<-apply(zstab.samp,2,quantile,prob=c(0.025,0.50,0.975),na.rm=TRUE) plot(zgrid,zstab.summary[2,],type='l',xlab='z', ylab=expression(paste(hat(E),"[Y(z)]")),col='red') lines(zgrid,zstab.summary[1,],lty=2,col='red') lines(zgrid,zstab.summary[3,],lty=2,col='red') legend(zgrid[1],28,c(expression(paste(hat(E),"[Y(z)]")),expression(paste(E,"[Y(z)]"))), col=c('red','black'),lty=1) title(expression(paste(hat(E),"[Y(z)]",' with stabilized weight'))) lines(zgrid,intercept.val+be[2]*zgrid) fit.true<-lm(zstab.summary[2,]~zgrid) coef(summary(fit.true)) @ \bigskip \textbf{EXAMPLE:} The methods also work for quadratic data generating models \[ \mu(x,z) = \bx_0 \beta + \psi_0 z + \psi_1 z^2 \] <>= #Monte Carlo study zhat.samp<-zstab.samp<-matrix(0,nrow=nreps,ncol=nZ) for(irep in 1:nreps){ X<-rmvn(n,mu=Mu,Sigma) Xal<-cbind(1,X) ps.true<-Xal %*% al Z<-rnorm(n,ps.true,sigZ) Xm<-cbind(1,Z,X) muval<-as.vector(Xm %*% be) Y<-rnorm(n,muval+0.1*(Z-Z0)^2,sigY) fnum<-lm(Z~1) fden<-lm(Z~X) den<-dnorm(Z,predict(fden),sd(fden$residuals)) ratio<-dnorm(Z,predict(fnum),sd(fnum$residuals))/den zhat<-zstab<-rep(0,nZ) for(ix in 1:nZ){ zl<-zgrid[ix]-d zu<-zgrid[ix]+d w0<-(Z > zl & Z < zu)/den w1<-(Z > zl & Z < zu)*ratio zhat[ix]<-sum(w0*Y,na.rm=T)/sum(w0,na.rm=T) zstab[ix]<-sum(w1*Y,na.rm=T)/sum(w1,na.rm=T) } zhat.samp[irep,]<-zhat zstab.samp[irep,]<-zstab } @ We look at the performance of $\widehat \mu_{\text{IPW}} (z)$ with the stabilized weight: <>= par(mar=c(4,5,2,0)) zhat.summary<-apply(zhat.samp,2,quantile,prob=c(0.025,0.50,0.975),na.rm=TRUE) zstab.summary<-apply(zstab.samp,2,quantile,prob=c(0.025,0.50,0.975),na.rm=TRUE) plot(zgrid,zhat.summary[2,],type='l',xlab='z',ylab=expression(paste(hat(E),"[Y(z)]")),col='red') lines(zgrid,zhat.summary[1,],lty=2,col='red') lines(zgrid,zhat.summary[3,],lty=2,col='red') lines(zgrid,zstab.summary[2,],col='blue') lines(zgrid,intercept.val+be[2]*zgrid+0.1*(zgrid-Z0)^2) legend(zgrid[1],max(zhat.summary[2,]),c('Truth','Standardized','Stabilized'), col=c('black','red','blue'),lty=1) @ \pagebreak \textbf{Weighted Least Squares:} In weighted least squares (WLS), estimation is carried out according to a proposed mean model, with minimization of a weighted least squares objective function. For example, the mean model $m(x,z) \equiv m(z) = \beta_0 + \psi_0 z$ might be fitted according to the WLS objective function \[ \sum_{i=1}^n w(x_i,z_i) (y_i - \beta_0 - \psi_0 z_i )^2 \] which can also be carried out using the \texttt{lm} function. <>= #Monte Carlo study n<-1000 nreps<-200 d<-0.05 zstab.samp<-matrix(0,nrow=nreps,ncol=nZ) for(irep in 1:nreps){ X<-rmvn(n,mu=Mu,Sigma) Xal<-cbind(1,X) ps.true<-Xal %*% al Z<-rnorm(n,ps.true,sigZ) Xm<-cbind(1,Z,X) muval<-as.vector(Xm %*% be) Y<-rnorm(n,muval,sigY) fnum<-lm(Z~1) fden<-lm(Z~X) ratio<-dnorm(Z,predict(fnum),sd(fnum$residuals))/dnorm(Z,predict(fden),sd(fden$residuals)) fitw<-lm(Y~Z,weight=ratio) zstab.samp[irep,]<-predict(fitw,newdata=data.frame(Z=zgrid)) } @ We look at the performance of $\widehat \mu_{\text{IPW}} (z)$ with the stabilized weight: <>= par(mar=c(4,5,2,0)) zstab.summary<-apply(zstab.samp,2,quantile,prob=c(0.025,0.50,0.975),na.rm=TRUE) plot(zgrid,zstab.summary[2,],type='l',xlab='z',ylab=expression(paste(hat(E),"[Y(z)]")),col='red') lines(zgrid,zstab.summary[1,],lty=2,col='red') lines(zgrid,zstab.summary[3,],lty=2,col='red') legend(zgrid[1],max(zstab.summary[2,]),c('Truth','Stabilized'),col=c('black','red'),lty=1) lines(zgrid,intercept.val+be[2]*zgrid,col='black') title('Linear case') @ <>= #Monte Carlo study n<-1000 nreps<-200 d<-0.05 zhat.samp<-zstab.samp<-matrix(0,nrow=nreps,ncol=nZ) for(irep in 1:nreps){ X<-rmvn(n,mu=Mu,Sigma) Xal<-cbind(1,X) ps.true<-Xal %*% al Z<-rnorm(n,ps.true,sigZ) Xm<-cbind(1,Z,X) muval<-as.vector(Xm %*% be) Y<-rnorm(n,muval+0.1*(Z-Z0)^2,sigY) fnum<-lm(Z~1) fden<-lm(Z~X) den<-dnorm(Z,predict(fden),sd(fden$residuals)) ratio<-dnorm(Z,predict(fnum),sd(fnum$residuals))/den fitw<-lm(Y~Z+I(Z^2),weight=ratio) zstab.samp[irep,]<-predict(fitw,newdata=data.frame(Z=zgrid)) } @ We look at the performance of $\widehat \mu_{\text{IPW}} (z)$ with the stabilized weight: <>= par(mar=c(4,5,2,0)) zstab.summary<-apply(zstab.samp,2,quantile,prob=c(0.025,0.50,0.975),na.rm=TRUE) plot(zgrid,zstab.summary[2,],type='l',xlab='z',ylab=expression(paste(hat(E),"[Y(z)]")),col='red') lines(zgrid,zstab.summary[1,],lty=2,col='red') lines(zgrid,zstab.summary[3,],lty=2,col='red') lines(zgrid,intercept.val+be[2]*zgrid+0.1*(zgrid-Z0)^2) legend(zgrid[1],max(zstab.summary[2,]),c('Truth','Stabilized'), col=c('black','red'),lty=1) title('Quadratic case') @ \end{document}