\documentclass[notitlepage]{article} \usepackage{Math} %\usepackage{../../Math598} \usepackage{listings} \usepackage{numprint} \usepackage{enumerate} \usepackage{bm} \usepackage{bbm} \usepackage{verbatim} \usepackage{mathtools} \usepackage{dsfont} \usepackage{amsfonts,amsmath,amssymb} \usepackage{tikz} \usetikzlibrary{shapes,decorations,arrows,calc,arrows.meta,fit,positioning} \usepackage{pgfplots} \pgfplotsset{compat=1.17} \newcommand*\circled[1]{\tikz[baseline=(char.base)]{ \node[shape=circle,draw,inner sep=2pt] (char) {#1};}} \usepackage{xcolor} \newtheorem{alg}{Algorithm} \newtheorem{ex}{Example} \newtheorem{note}{Note} \newenvironment{proof}{\par\noindent{\normalfont{\bf{Proof }}}} {\hfill \small$\blacksquare$\\[2mm]} \setlength{\parindent}{0in} \def\E{\mathbbmss{E}} \def\Var{\text{Var}} \def\Cov{\text{Cov}} \def\Corr{\text{Corr}} \def\bW{\mathbf{W}} \def\bH{\mathbf{H}} \newcommand{\bs}[1]{\bm{#1}} \newcommand{\Rho}{\mathrm{P}} \newcommand{\cif}{\text{ if }} \newcommand{\Ra}{\Longrightarrow} \def\bphi{\boldsymbol \phi} \lstloadlanguages{R} \definecolor{keywordcolor}{rgb}{0,0.6,0.6} \definecolor{delimcolor}{rgb}{0.461,0.039,0.102} \definecolor{Rcommentcolor}{rgb}{0.101,0.043,0.432} \lstdefinestyle{Rsettings}{ basicstyle=\ttfamily, breaklines=true, showstringspaces=false, keywords={if, else, function, theFunction, tmp}, % Write as many keywords otherkeywords={}, commentstyle=\itshape\color{Rcommentcolor}, keywordstyle=\color{keywordcolor}, moredelim=[s][\color{delimcolor}]{"}{"}, } \lstset{basicstyle=\ttfamily, numbers=none, literate={~} {$\sim$}{2}} \def\Xb{\mathbf{X}_{\beta}} \def\Xp{\mathbf{X}_{\psi}} \def\beps{\boldsymbol{\epsilon}} \def\betahat{\widehat \beta} \def\psihat{\widehat \psi} \begin{document} <>= library(knitr) # global chunk options opts_chunk$set(cache=TRUE, autodep=TRUE) options(scipen=6) options(repos=c(CRAN="https://cloud.r-project.org/")) @ \begin{center} {\textsclarge{Inverse Probability Weighting}} \end{center} \tikzset{ -Latex,auto,node distance =1 cm and 1 cm,semithick, state/.style ={circle, draw, minimum width = 0.7 cm}, box/.style ={rectangle, draw, minimum width = 0.7 cm, fill=lightgray}, point/.style = {circle, draw, inner sep=0.08cm,fill,node contents={}}, bidirected/.style={Latex-Latex,dashed}, el/.style = {inner sep=3pt, align=left, sloped} } The propensity score for binary exposure, denoted $e(X)$, is defined via the observational conditional model for $Z$ given confounders $X$, as \[ e(x) = f_{Z|X}^\calO (1|x) = {\Pr}_{Z|X}^\calO[Z=1|X=x] \] with $e(X)$ being the corresponding random variable. We have that in the binary treatment case \[ \mu(0) = \dfrac{\E_{X,Y,Z}^\calO \left[ \dfrac{(1-Z) Y}{1-e(X)} \right]}{\E_{X,Y,Z}^\calO \left[ \dfrac{(1-Z)}{1-e(X)} \right]} \qquad \qquad \mu(1) = \dfrac{\E_{X,Y,Z}^\calO \left[ \dfrac{Z Y}{e(X)} \right]}{\E_{X,Y,Z}^\calO \left[ \dfrac{Z}{e(X)} \right]} \] \bigskip We deduce that suitable estimators following from the importance sampling results are \[ \widehat \mu_{IPW} (0) = \dfrac{\dfrac{1}{n} \sum\limits_{i=1}^n \dfrac{(1-Z_i) Y_i}{1-e(X_i)} }{\dfrac{1}{n} \sum\limits_{i=1}^n \dfrac{(1-Z_i)}{1-e(X_i)}} \qquad \qquad \widehat \mu_{IPW}(1) = \dfrac{\dfrac{1}{n} \sum\limits_{i=1}^n \dfrac{Z_i Y_i}{e(X_i)}}{\dfrac{1}{n} \sum\limits_{i=1}^n \dfrac{Z_i}{e(X_i)}}. \] Alternative estimators are \[ \widetilde \mu_{IPW} (0) = \dfrac{1}{n} \sum\limits_{i=1}^n \dfrac{(1-Z_i) Y_i}{1-e(X_i)} \qquad \qquad \widetilde \mu_{IPW}(1) = \dfrac{1}{n} \sum\limits_{i=1}^n \dfrac{Z_i Y_i}{e(X_i)}. \] <>= #Data simulation library(mvnfast) set.seed(23987) n<-5000 k<-10 Mu<-sample(-5:5,size=k,rep=T)/2 Sigma<-0.1*0.8^abs(outer(1:k,1:k,'-')) X<-rmvn(n,mu=Mu,Sigma) al<-c(-4,rep(1,6),rep(-2,4)) Xal<-cbind(1,X) expit<-function(x){return(1/(1+exp(-x)))} ps.true<-expit(Xal %*% al) #hist(ps.true) Z<-rbinom(n,1,ps.true) table(Z) be<-rep(0,k+2) be[1:5]<-c(2,1,-2.0,2.2,3.6) Xm<-cbind(1,Z,X) muval<-as.vector(Xm %*% be) sig<-2 Y<-rnorm(n,muval,sig) eX<-ps.true w<-(1-Z)/(1-eX)+Z/eX @ We may perform direct calculation: <>= w0<-(1-Z)/(1-eX) W0<-w0/sum(w0) w1<-Z/eX W1<-w1/sum(w1) #Mu tilde mean(w1*Y)-mean(w0*Y) #Mu hat sum(W1*Y)-sum(W0*Y) @ We may also use regression methods: <>= #Unadjusted coef(summary(lm(Y~Z))) #IPW coef(summary(lm(Y~Z,weights=w))) @ <>= #We may also use a re-weighted pseudo sample id<-sample(1:n,size=n,prob=w,rep=T) #Resample an unconfounded data set coef(summary(lm(Y[id]~Z[id]))) @ <>= #Monte Carlo study nreps<-2000 ests<-matrix(0,nrow=nreps,ncol=3) for(irep in 1:nreps){ X<-rmvn(n,mu=Mu,Sigma) Xal<-cbind(1,X) ps.true<-expit(Xal %*% al) Z<-rbinom(n,1,ps.true) Xm<-cbind(1,Z,X) muval<-as.vector(Xm %*% be) Y<-rnorm(n,muval,sig) eX<-ps.true w<-(1-Z)/(1-eX)+Z/eX w0<-(1-Z)/(1-eX) W0<-w0/sum(w0) w1<-Z/eX W1<-w1/sum(w1) ests[irep,1]<-mean(w1*Y)-mean(w0*Y) ests[irep,2]<-sum(W1*Y)-sum(W0*Y) id<-sample(1:n,size=n,prob=w,rep=T) ests[irep,3]<-coef(lm(Y[id]~Z[id]))[2] } @ <>= par(mar=c(4,4,2,0)) boxplot(ests,ylim=range(-2.5,2.5), names=c(expression(tilde(mu)),expression(hat(mu)),expression(paste('Pseudo ',hat(mu))))) abline(h=1,lty=2,col='red') #Bias sqrt(n)*apply(ests-1,2,mean) #Variance n*apply(ests,2,var) @ \end{document}