\documentclass[notitlepage]{article} \usepackage{Math533} \usepackage{listings} \usepackage{numprint} \usepackage{amsmath} \usepackage{bbm} \lstloadlanguages{R} \lstset{basicstyle=\ttfamily, numbers=none, literate={~} {$\sim$}{2}} \makeatletter \renewcommand\section{\@startsection {section}{1}{\z@}% {-3.5ex \@plus -1ex \@minus -.2ex}% {2.3ex \@plus.2ex}% {\normalfont\LARGE\bfseries}}% from \Large \renewcommand\subsection{\@startsection{subsection}{2}{\z@}% {-3.25ex\@plus -1ex \@minus -.2ex}% {1.5ex \@plus .2ex}% {\normalfont\Large\bfseries}}% from \large \renewcommand\subsubsection{\@startsection{subsubsection}{3}{\z@}% {-3.25ex\@plus -1ex \@minus -.2ex}% {1.5ex \@plus .2ex}% {\normalfont\large\bfseries}}% from \normalsize \renewcommand\thesection{\@arabic\c@section.} \renewcommand\thesubsection{\thesection\@arabic\c@subsection.} \renewcommand\thesubsubsection{\thesubsection\@arabic\c@subsubsection.} \makeatother \begin{document} <>= library(knitr) # global chunk options opts_chunk$set(cache=TRUE, autodep=TRUE) options(scipen=999) @ \title{Analysis of the NHANES data using propensity score adjustment} \maketitle \begin{abstract} The file contains the analysis of the NHANES data from R using various propensity score adjustment methods. \end{abstract} \section{Data} <>= rm(list=ls()) file.remove(list.files(pattern='.pdf')) list.of.packages <- c("NHANES", "tableone", "Matching", "MatchIt","survey", "twang","SuperLearner","glmnet","polspline","randomForest","SIS","Hmisc") new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])] if(length(new.packages)) install.packages(new.packages,verbose=FALSE) library(NHANES,verbose=FALSE) library(SuperLearner,verbose=FALSE) library(nnet,verbose=FALSE) library(nnls,verbose=FALSE) library(glmnet,verbose=FALSE) library(polspline,verbose=FALSE) library(randomForest,verbose=FALSE) library(SIS,verbose=FALSE) library(twang,verbose=FALSE) library(tableone,verbose=FALSE) library(survey,verbose=FALSE) library(Matching,verbose=FALSE) library(MatchIt,verbose=FALSE) library(Hmisc,verbose=FALSE) @ <>= set.seed(37) NHANES$SmokeNow <- as.numeric(NHANES$SmokeNow)-1 small.nhanes <- na.omit(NHANES[NHANES$SurveyYr=="2011_12" & NHANES$Age > 17,c(3,4,8:11,13,25,61)]) dim(small.nhanes) names(small.nhanes) @ <>= temp.mat <- model.matrix(~Gender*Age+Gender*Race3+Gender*Education+Gender*MaritalStatus+ Gender*HHIncome+Gender*Poverty+Age*Race3+Age*Education+Age*MaritalStatus+ Age*HHIncome+Age*Poverty+Race3*Education+Race3*MaritalStatus+Race3*HHIncome+ Race3*Poverty+Education*MaritalStatus+Education*HHIncome+Education*Poverty+ MaritalStatus*HHIncome+MaritalStatus*Poverty+HHIncome*Poverty,data=small.nhanes) interact.data <- model.matrix(~Gender*Age+Gender*Race3+Gender*Education+Gender*MaritalStatus+ Gender*HHIncome+Gender*Poverty+Age*Race3+Age*Education+Age*MaritalStatus+ Age*HHIncome+Age*Poverty+Race3*Poverty+Education*Poverty+MaritalStatus*Poverty+ HHIncome*Poverty,data=small.nhanes) interact.data <- data.frame(interact.data) interact.data$SmokeNow <- small.nhanes$SmokeNow vars.interact <- colnames(interact.data)[30:107] @ <>= vars <- c("Gender", "Age", "Race3", "Education", "MaritalStatus", "Poverty") tabUnmatched <- CreateTableOne(vars = vars, strata = "SmokeNow", data = small.nhanes, test = FALSE) print(tabUnmatched, smd = TRUE) temp0 <- Ecdf(small.nhanes$Age[small.nhanes$SmokeNow==0],pl=F) temp1 <- Ecdf(small.nhanes$Age[small.nhanes$SmokeNow==1],pl=F) par(mar=c(2,3,2,1)) plot(temp0$x,temp0$y,ylab="ECDF(Age)",xlab="Age",main="",type="l",lwd=2) lines(temp1$x,temp1$y,col="red",lwd=2) title('Cumulative distribution of Age by treatment group') legend(20,1,c('SmokeNow=0','SmokeNow=1'),col=c('black','red'),lty=1,lwd=2) ########## ## on interactions? SMDinteract <- CreateTableOne(vars = vars.interact, strata = "SmokeNow", data=interact.data,test = FALSE) summary(ExtractSmd(SMDinteract)) @ \section{Logistic regression} <>= ps.mod <- glm(SmokeNow~Gender+Age+Race3+Education+MaritalStatus+HHIncome+Poverty, data=small.nhanes,family="binomial") ps.lr <- predict(ps.mod,type="response") small.nhanes$ps.lr <- ps.lr (quints <- c(0,quantile(ps.lr,seq(.2,1,.2)))) summary(ps.lr) Smoke<-small.nhanes$SmokeNow par(mar=c(2,3,2,1)) boxplot(ps.lr[Smoke==0],ps.lr[Smoke==1],main='PS Quintiles', ylab="PS",xlab="Treatment Group",names=c(0,1),ylim=range(0,1),col='gray') abline(h=quints[2:5],col="red") small.nhanes$ps.lr <- ps.lr rbind(table(cut(ps.lr[small.nhanes$SmokeNow==0],quints)), table(cut(ps.lr[small.nhanes$SmokeNow==1],quints))) @ <>= ps.lr.quints <- cut(ps.lr,quints,labels=1:5) small.nhanes$ps.lr.quints <- ps.lr.quints par(mfrow=c(2,3),mar=c(4,4,2,1)) for(j in 1:5) { nonsmj <- small.nhanes$Age[small.nhanes$SmokeNow==0 & small.nhanes$ps.lr.quints==j] temp0 <- Ecdf(nonsmj,pl=FALSE) smj <- small.nhanes$Age[small.nhanes$SmokeNow==1 & small.nhanes$ps.lr.quints==j] temp1 <- Ecdf(smj,pl=FALSE) plot(temp0$x,temp0$y,ylab="ECDF(Age)",xlab="Age",main="",type="l",lwd=2) lines(temp1$x,temp1$y,col="red",lwd=2) } SMD.table <- ExtractSmd(tabUnmatched) for(j in 1:5) { tabPSquints <- CreateTableOne(vars = vars, strata = "SmokeNow", data = small.nhanes[ps.lr.quints==j,], test = FALSE) SMD.table <- cbind(SMD.table,ExtractSmd(tabPSquints)) } round(SMD.table,3) Max.SMD <- max(SMD.table);Mean.SMD <- mean(SMD.table); Med.SMD <- median(SMD.table) @ <>= boxplot(ps.lr[small.nhanes$SmokeNow==0],ps.lr[small.nhanes$SmokeNow==1],ylim=range(0,1), ylab="PS",xlab="Treatment Group",names=c(0,1),col='gray') (dec <- c(0,quantile(ps.lr,seq(.1,1,.1)))) abline(h=dec[2:10],col="red");title('PS Deciles') rbind(table(cut(ps.lr[Smoke==0],dec)),table(cut(ps.lr[Smoke==1],dec))) ps.lr.dec <- cut(ps.lr,dec,labels=1:10) SMD.10.table <- ExtractSmd(tabUnmatched) for(j in 1:10) { tabPSdec <- CreateTableOne(vars = vars, strata = "SmokeNow", data = small.nhanes[ps.lr.dec==j,], test = FALSE) SMD.10.table <- cbind(SMD.10.table,ExtractSmd(tabPSdec)) } round(SMD.10.table,2) @ \section{Inverse Probability Weighting} <>= ps.lr.weight <-Smoke/ps.lr + (1-Smoke)/(1-ps.lr) temp0 <- Ecdf(small.nhanes$Age[Smoke==0],weights=ps.lr.weight[Smoke==0],pl=F) temp1 <- Ecdf(small.nhanes$Age[Smoke==1],weights=ps.lr.weight[Smoke==1],pl=F) plot(temp0$x,temp0$y,ylab="ECDF(Age)",xlab="Age", main="Cumulative distribution of Age by treatment group",type="l",lwd=2) lines(temp1$x,temp1$y,col="red",lwd=2) legend(20,1,c('SmokeNow=0','SmokeNow=1'),col=c('black','red'),lty=1,lwd=2) nhanes.IPW.lr <- svydesign(ids=~0, data=small.nhanes, weights=ps.lr.weight) tabIPW <- svyCreateTableOne(vars = vars, strata = "SmokeNow", data = nhanes.IPW.lr, test = FALSE) print(tabIPW, smd = TRUE) round(cbind(SMD.table,ExtractSmd(tabIPW)),3) @ <>= interact.data$ps.lr.weight <- ps.lr.weight nhanes.IPW.lr.interact <- svydesign(ids=~0, data=interact.data, weights=ps.lr.weight) SMDinteract.IPW.a <- svyCreateTableOne(vars = vars.interact[1:25], strata = "SmokeNow", data = nhanes.IPW.lr.interact, test = FALSE) SMDinteract.IPW.b <- svyCreateTableOne(vars = vars.interact[26:50], strata = "SmokeNow", data = nhanes.IPW.lr.interact, test = FALSE) SMDinteract.IPW.c <- svyCreateTableOne(vars = vars.interact[51:78], strata = "SmokeNow", data = nhanes.IPW.lr.interact, test = FALSE) SMDinteract.IPW <- c(ExtractSmd(SMDinteract.IPW.a),ExtractSmd(SMDinteract.IPW.b), ExtractSmd(SMDinteract.IPW.c)) summary(SMDinteract.IPW) interact.table <- cbind(ExtractSmd(SMDinteract),SMDinteract.IPW) par(mar=c(3,3,2,1)) boxplot(cbind(ExtractSmd(SMDinteract),SMDinteract.IPW),col='gray', ylim=range(0,1),names=c("Unadjusted","IPW: LR")) @ \section{Matching} <>= ## Matching ps.lr.match <- Match(Tr=small.nhanes$SmokeNow,X=small.nhanes$ps.lr,ties=FALSE) matched.samp <- small.nhanes[c(ps.lr.match$index.control,ps.lr.match$index.treated),] table(table(c(ps.lr.match$index.control,ps.lr.match$index.treated))) temp0 <- Ecdf(matched.samp$Age[matched.samp$SmokeNow==0],pl=F) temp1 <- Ecdf(matched.samp$Age[matched.samp$SmokeNow==1],pl=F) par(mar=c(3,3,2,1)) plot(temp0$x,temp0$y,ylab="ECDF(Age)",xlab="Age",main="Cumulative distribution of Age by treatment group",type="l",lwd=2) lines(temp1$x,temp1$y,col="red",lwd=2) legend(20,1,c('SmokeNow=0','SmokeNow=1'),col=c('black','red'),lwd=2) ##Summarize balance after matching: ##Not run #MatchBalance(SmokeNow~Gender+Age+Race3+Education+MaritalStatus+HHIncome+Poverty, # data=small.nhanes,match.out=ps.lr.match) tabMatched <- CreateTableOne(vars = vars, strata = "SmokeNow", data = matched.samp, test = FALSE) round(cbind(SMD.table,ExtractSmd(tabMatched),ExtractSmd(tabIPW)),3) Max.SMD <- c(NA,Max.SMD,max(ExtractSmd(tabMatched)),NA,max(ExtractSmd(tabIPW))) Mean.SMD <- c(NA,Mean.SMD,mean(ExtractSmd(tabMatched)),NA,mean(ExtractSmd(tabIPW))) Med.SMD <- c(NA,Med.SMD,median(ExtractSmd(tabMatched)),NA,median(ExtractSmd(tabIPW))) ps.SMDtable <- round(cbind(SMD.table,ExtractSmd(tabMatched),ExtractSmd(tabIPW)),3) @ \section{Generalized Boosting} <>= ## GBM with twang library gbm.fit <- ps(SmokeNow~Gender+Age+Race3+Education+MaritalStatus+HHIncome+Poverty, estimand = "ATE", data=as.data.frame(small.nhanes),verbose=FALSE) ps.gbm <- gbm.fit$ps$ks.mean.ATE;small.nhanes$ps.gbm <- ps.gbm summary(ps.gbm) par(mar=c(3,3,2,1)) boxplot(ps.gbm[small.nhanes$SmokeNow==0],ps.gbm[small.nhanes$SmokeNow==1],col='gray',ylim=range(0,1)) par(mar=c(3,3,2,1),pty='s') plot(ps.lr,ps.gbm,pch=19,xlim=range(0,1),ylim=range(0,1)) ## correlate pretty well points(ps.lr[small.nhanes$SmokeNow==1],ps.gbm[small.nhanes$SmokeNow==1],col="red") ## correlate very badly!! par(mar=c(3,3,2,1)) boxplot(ps.gbm[small.nhanes$SmokeNow==0],ps.gbm[small.nhanes$SmokeNow==1],ylim=range(0,1), ylab="PS (GBM)",xlab="Treatment Group",names=c(0,1),col='gray') (quints.gbm <- c(0,quantile(ps.gbm,seq(.2,1,.2)))) abline(h=quints.gbm[2:5],col="red") small.nhanes$ps.gbm <- ps.gbm rbind(table(cut(ps.gbm[small.nhanes$SmokeNow==0],quints.gbm)), table(cut(ps.gbm[small.nhanes$SmokeNow==1],quints.gbm))) ps.gbm.weight <- small.nhanes$SmokeNow/ps.gbm + (1-small.nhanes$SmokeNow)/(1-ps.gbm) nhanes.IPW.gbm <- svydesign(ids=~0, data=small.nhanes, weights=ps.gbm.weight) tabIPW.gbm <- svyCreateTableOne(vars = vars, strata = "SmokeNow", data = nhanes.IPW.gbm, test = FALSE) round(cbind(ExtractSmd(tabUnmatched),ExtractSmd(tabIPW),ExtractSmd(tabIPW.gbm)),3) ########## ## on interactions? interact.data$ps.gbm.weight <- ps.gbm.weight nhanes.IPW.gbm.interact <- svydesign(ids=~0, data=interact.data, weights=ps.gbm.weight) SMDinteract.IPW.gbm.a <- svyCreateTableOne(vars = vars.interact[1:25], strata = "SmokeNow", data = nhanes.IPW.gbm.interact, test = FALSE) SMDinteract.IPW.gbm.b <- svyCreateTableOne(vars = vars.interact[26:50], strata = "SmokeNow", data = nhanes.IPW.gbm.interact, test = FALSE) SMDinteract.IPW.gbm.c <- svyCreateTableOne(vars = vars.interact[51:78], strata = "SmokeNow", data = nhanes.IPW.gbm.interact, test = FALSE) SMDinteract.IPW.gbm <- c(ExtractSmd(SMDinteract.IPW.gbm.a),ExtractSmd(SMDinteract.IPW.gbm.b), ExtractSmd(SMDinteract.IPW.gbm.c)) summary(SMDinteract.IPW.gbm) interact.table <- cbind(interact.table,SMDinteract.IPW.gbm) boxplot(interact.table,col='gray',names=c("Unadjusted","IPW: LR", "IPW: GBM")) ps.gbm.quints <- cut(ps.gbm,quints.gbm,labels=1:5) SMD.gbm.table <- NULL for(j in 1:5) { tabPSquints.gbm <- CreateTableOne(vars = vars, strata = "SmokeNow", data = small.nhanes[ps.gbm.quints==j,], test = FALSE) SMD.gbm.table <- cbind(SMD.gbm.table,ExtractSmd(tabPSquints.gbm)) } round(SMD.gbm.table,3) ps.gbm.match <- Match(Tr=small.nhanes$SmokeNow,X=small.nhanes$ps.gbm,estimand="ATE",ties=FALSE) matched.gbm.samp <- small.nhanes[c(ps.gbm.match$index.control,ps.gbm.match$index.treated),] #Not run #MatchBalance(SmokeNow~Gender+Age+Race3+Education+MaritalStatus+HHIncome+Poverty, # data=small.nhanes,match.out=ps.gbm.match) tabMatched.gbm <- CreateTableOne(vars = vars, strata = "SmokeNow", data = matched.gbm.samp, test = FALSE) temp0 <- Ecdf(matched.gbm.samp$Age[matched.gbm.samp$SmokeNow==0],pl=F) temp1 <- Ecdf(matched.gbm.samp$Age[matched.gbm.samp$SmokeNow==1],pl=F) plot(temp0$x,temp0$y,ylab="ECDF(Age)",xlab="Age",main="",type="l",lwd=2) lines(temp1$x,temp1$y,col="red",lwd=2) legend(20,1,c('SmokeNow=0','SmokeNow=1'),col=c('black','red'),lty=1,lwd=2) temp0<-Ecdf(small.nhanes$Age[Smoke==0],weights=ps.gbm.weight[Smoke==0],pl=F) temp1<-Ecdf(small.nhanes$Age[Smoke==1],weights=ps.gbm.weight[Smoke==1],pl=F) plot(temp0$x,temp0$y,ylab="ECDF(Age)",xlab="Age",main="",type="l",lwd=2) lines(temp1$x,temp1$y,col="red",lwd=2) legend(20,1,c('SmokeNow=0','SmokeNow=1'),col=c('black','red'),lty=1,lwd=2) SMD.gbm.table <- cbind(SMD.gbm.table,ExtractSmd(tabMatched.gbm),ExtractSmd(tabIPW.gbm)) round(SMD.gbm.table,3) @ \section{Super Learner} <>= ## SuperLearner X.mat <- data.frame(cbind(small.nhanes$Gender,small.nhanes$Age,small.nhanes$Race3, small.nhanes$Education,small.nhanes$MaritalStatus,small.nhanes$HHIncome,small.nhanes$Poverty)) my.library <- c("SL.knn","SL.randomForest","SL.glmnet","SL.mean") SL.fit <- SuperLearner(Y = small.nhanes$SmokeNow, X = X.mat, SL.library = my.library,verbose = FALSE, method ="method.NNLS", family=binomial()) small.nhanes$ps.SL <- ps.SL <- SL.fit$SL.predict mean(as.numeric(ps.SL > 0.5) == Smoke) par(mar=c(2,3,2,1)) boxplot(ps.SL[Smoke==0],ps.SL[Smoke==1],ylab="PS (SL)",xlab="Treatment Group",names=c(0,1),col='gray') (quints.SL <- c(0,quantile(ps.SL,seq(.2,1,.2)))) abline(h=quints.SL[2:5],col="red",lwd=2) @ <>= par(mar=c(2,3,2,1),mfrow=c(1,2),pty='s') plot(ps.lr,ps.SL,pch=19) ## correlate very badly!! points(ps.lr[Smoke==1],ps.SL[Smoke==1],col="red",pch=19) ## correlate very badly!! plot(ps.gbm,ps.SL,pch=19) ## correlate badly points(ps.gbm[Smoke==1],ps.SL[Smoke==1],col="red",pch=19) ## correlate very badly!! @ <>= rbind(table(cut(ps.SL[Smoke==0],quints.SL)), table(cut(ps.SL[Smoke==1],quints.SL))) ps.SL[ps.SL==0] <- min(ps.SL[ps.SL!=0]) ps.SL.weight <- Smoke/ps.SL + (1-Smoke)/(1-ps.SL) nhanes.IPW.SL <- svydesign(ids=~0, data=small.nhanes, weights=ps.SL.weight) tabIPW.SL <- svyCreateTableOne(vars = vars, strata = "SmokeNow", data = nhanes.IPW.SL, test = FALSE) ExtractSmd(tabIPW.SL) ps.SL.quints <- cut(ps.SL,quints.SL,labels=1:5) SMD.SL.table <- NULL for(j in 3) { tabPSquints.SL <- CreateTableOne(vars = vars, strata = "SmokeNow", data = small.nhanes[ps.SL.quints==j,], test = FALSE) SMD.SL.table <- cbind(SMD.SL.table,ExtractSmd(tabPSquints.SL)) } round(SMD.SL.table,3) @ <>= ps.SL.match <- Match(Tr=small.nhanes$SmokeNow,X=small.nhanes$ps.SL,estimand="ATE",ties=FALSE) matched.SL.samp <- small.nhanes[c(ps.SL.match$index.control,ps.SL.match$index.treated),] ##Not run #MatchBalance(SmokeNow~Gender+Age+Race3+Education+MaritalStatus+HHIncome+Poverty, # data=small.nhanes,match.out=ps.SL.match) tabMatched.SL <- CreateTableOne(vars = vars, strata = "SmokeNow",data = matched.SL.samp, test = FALSE) temp0 <- Ecdf(matched.SL.samp$Age[matched.SL.samp$SmokeNow==0],pl=F) temp1 <- Ecdf(matched.SL.samp$Age[matched.SL.samp$SmokeNow==1],pl=F) par(mar=c(2,3,2,1)) plot(temp0$x,temp0$y,ylab="ECDF(Age)",xlab="Age",main="",type="l",lwd=2) lines(temp1$x,temp1$y,col="red",lwd=2) legend(20,1,c('SmokeNow=0','SmokeNow=1'),col=c('black','red'),lty=1,lwd=2) temp0 <- Ecdf(small.nhanes$Age[Smoke==0],weights=ps.SL.weight[Smoke==0],pl=F) temp1 <- Ecdf(small.nhanes$Age[Smoke==1],weights=ps.SL.weight[Smoke==1],pl=F) par(mar=c(2,3,2,1)) plot(temp0$x,temp0$y,ylab="ECDF(Age)",xlab="Age",main="",type="l",lwd=2) lines(temp1$x,temp1$y,col="red",lwd=2) SMD.SL.table <- cbind(SMD.SL.table,ExtractSmd(tabMatched.SL),ExtractSmd(tabIPW.SL)) round(SMD.SL.table,3) round(cbind(ExtractSmd(tabUnmatched),ExtractSmd(tabIPW), ExtractSmd(tabIPW.gbm),ExtractSmd(tabIPW.SL)),3) ########## ## Balance on interactions? interact.data$ps.SL.weight <- ps.SL.weight nhanes.IPW.SL.interact <- svydesign(ids=~0, data=interact.data, weights=ps.SL.weight) SMDinteract.IPW.SL.a <- svyCreateTableOne(vars = vars.interact[1:25], strata = "SmokeNow", data = nhanes.IPW.SL.interact, test = FALSE) SMDinteract.IPW.SL.b <- svyCreateTableOne(vars = vars.interact[26:50], strata = "SmokeNow", data = nhanes.IPW.SL.interact, test = FALSE) SMDinteract.IPW.SL.c <- svyCreateTableOne(vars = vars.interact[51:78], strata = "SmokeNow", data = nhanes.IPW.SL.interact, test = FALSE) SMDinteract.IPW.SL <- c(ExtractSmd(SMDinteract.IPW.SL.a),ExtractSmd(SMDinteract.IPW.SL.b), ExtractSmd(SMDinteract.IPW.SL.c)) summary(SMDinteract.IPW.SL) interact.table <- cbind(interact.table,SMDinteract.IPW.SL) par(mar=c(2,3,2,1)) boxplot(interact.table,xlab="Approach", names=c("Unadjusted","IPW: LR", "IPW: GBM", "IPW: SL"),col='gray') title('Balance on Interactions') @ \section{ATE Estimation} <>= # Naive: simple model coef(summary(lm(BPSysAve~SmokeNow,data=small.nhanes)))[2,] ## -3.68 #Regression coef(summary(lm(BPSysAve~SmokeNow+Gender+Age+Race3+ Education+MaritalStatus+HHIncome+Poverty,data=small.nhanes)))[2,] ## -1.10 #PS regression: quintiles coef(summary(lm(BPSysAve~SmokeNow+ps.lr.quints,data=small.nhanes)))[2,] ## -1.41 #PS Regression coef(summary(lm(BPSysAve~SmokeNow+ps.lr,data=small.nhanes)))[2,] ## -1.11 #PS Regression with quadratic term coef(summary(lm(BPSysAve~SmokeNow+ps.lr+I(ps.lr^2),data=small.nhanes)))[2,] ## -1.11 #IPW coef(summary(lm(BPSysAve~SmokeNow,weights=ps.lr.weight,data=small.nhanes)))[2,] ## -1.99 #Matching matched.anal <- Match(Y=small.nhanes$BPSysAve, Tr=small.nhanes$SmokeNow, X=X.mat, estimand = "ATE", ties=FALSE) matched.anal <- Match(Y=small.nhanes$BPSysAve, Tr=small.nhanes$SmokeNow, X=ps.lr, estimand = "ATE", ties=FALSE) summary(matched.anal) # Note that this value changes from match to match due to randomness in matching algorithm @ <>= nhanes.allsmoke <- small.nhanes nhanes.allsmoke$SmokeNow <- 1 nhanes.nosmoke <- small.nhanes nhanes.nosmoke$SmokeNow <- 0 all.ests <- NULL ## Regression coef(lm(BPSysAve~SmokeNow,data=small.nhanes))[2] coef(lm(BPSysAve~SmokeNow+Gender+Age+Race3+Education+MaritalStatus+HHIncome+Poverty, data=small.nhanes))[2] ## ATE via regression mod1.lm <- lm(BPSysAve~SmokeNow+Gender+Age+Race3+Education+MaritalStatus+HHIncome+Poverty, data=small.nhanes) APO.lm.1 <- mean(predict(mod1.lm,nhanes.allsmoke)) APO.lm.0 <- mean(predict(mod1.lm,nhanes.nosmoke)) APO.lm.1 - APO.lm.0 all.ests <- c(all.ests,APO.lm.1 - APO.lm.0) mod1.lmX <- lm(BPSysAve~SmokeNow+Gender+Age+Race3+Education+MaritalStatus+HHIncome+Poverty+ SmokeNow:HHIncome+SmokeNow:Gender+SmokeNow:Age,data=small.nhanes) APO.lmX.1 <- mean(predict(mod1.lmX,nhanes.allsmoke)) APO.lmX.0 <- mean(predict(mod1.lmX,nhanes.nosmoke)) APO.lmX.1 - APO.lmX.0 ## ATE via PS stratification ps.lr.quints <- cut(ps.lr,quints,labels=1:5) sum(table(cut(ps.lr,quints))) summary(ps.lr.quints) small.nhanes$ps.lr.quints <- ps.lr.quints p.strat <- table(ps.lr.quints)/length(ps.lr.quints) p.strat ATE.strat <- rep(NA,5) for(j in 1:5) { ATE.strat[j] <- mean(small.nhanes$BPSysAve[Smoke == 1 & small.nhanes$ps.lr.quints==j]) - mean(small.nhanes$BPSysAve[Smoke == 0 & small.nhanes$ps.lr.quints==j]) } ATE.strat sum(ATE.strat*p.strat) all.ests <- c(all.ests,sum(ATE.strat*p.strat)) ## ATE via matching mean(matched.samp$BPSysAve[matched.samp$SmokeNow == 1]) - mean(matched.samp$BPSysAve[matched.samp$SmokeNow == 0]) all.ests <- c(all.ests,mean(matched.samp$BPSysAve[matched.samp$SmokeNow == 1]) - mean(matched.samp$BPSysAve[matched.samp$SmokeNow == 0])) ## ATE via PS regression mod1.PSlm1 <- lm(BPSysAve~SmokeNow+ps.lr,data=small.nhanes) APO.PSlm1.1 <- mean(predict(mod1.PSlm1,nhanes.allsmoke)) APO.PSlm1.0 <- mean(predict(mod1.PSlm1,nhanes.nosmoke)) APO.PSlm1.1 - APO.PSlm1.0 mod1.PSlm2 <- lm(BPSysAve~SmokeNow+ps.lr+I(ps.lr^2),data=small.nhanes) APO.PSlm2.1 <- mean(predict(mod1.PSlm2,nhanes.allsmoke)) APO.PSlm2.0 <- mean(predict(mod1.PSlm2,nhanes.nosmoke)) APO.PSlm2.1 - APO.PSlm2.0 mod1.PSlm3 <- lm(BPSysAve~SmokeNow+bs(ps.lr,df=4),data=small.nhanes) APO.PSlm3.1 <- mean(predict(mod1.PSlm3,nhanes.allsmoke)) APO.PSlm3.0 <- mean(predict(mod1.PSlm3,nhanes.nosmoke)) APO.PSlm3.1 - APO.PSlm3.0 all.ests <- c(all.ests,APO.PSlm3.1 - APO.PSlm3.0) ## ATE via IPW small.nhanes$ps.lr.weight <- Smoke/small.nhanes$ps.lr + (1-Smoke)/(1-small.nhanes$ps.lr) IPW.est<-mean(Smoke*small.nhanes$BPSysAve*small.nhanes$ps.lr.weight) - mean((1-Smoke)*small.nhanes$BPSysAve*small.nhanes$ps.lr.weight) all.ests <- c(all.ests,IPW.est) coef(lm(BPSysAve ~ SmokeNow, weights = ps.lr.weight,data=small.nhanes)) mean(Smoke*small.nhanes$BPSysAve/small.nhanes$ps.lr) - mean((1-Smoke)*small.nhanes$BPSysAve/(1-small.nhanes$ps.lr)) # Final table: round(rbind(Max.SMD, Mean.SMD, Med.SMD, all.ests), 3) @ \section{ATT Estimation} <>= matched.anal.ATT <- Match(Y=small.nhanes$BPSysAve, Tr=small.nhanes$SmokeNow, X=ps.lr, estimand = "ATT", ties=FALSE) summary(matched.anal.ATT) matched.samp.ATT <- small.nhanes[c(matched.anal.ATT$index.control,matched.anal.ATT$index.treated),] mean(matched.samp.ATT$BPSysAve[matched.samp.ATT$SmokeNow == 1]) - mean(matched.samp.ATT$BPSysAve[matched.samp.ATT$SmokeNow == 0]) temp0 <- Ecdf(matched.samp.ATT$Age[matched.samp.ATT$SmokeNow==0],pl=F) temp1 <- Ecdf(matched.samp.ATT$Age[matched.samp.ATT$SmokeNow==1],pl=F) par(mar=c(4,4,2,1)) plot(temp0$x,temp0$y,ylab="ECDF(Age)",xlab="Age",main="",type="l",lwd=2) lines(temp1$x,temp1$y,col="red",lwd=2) ATT.match <- CreateTableOne(vars = vars, strata = "SmokeNow", data = matched.samp.ATT, test = FALSE) SMD.ATT <- ExtractSmd(ATT.match) ## ATT via IPW small.nhanes$ATT.lr.weight <- Smoke + (1-Smoke)*ps.lr/(1-ps.lr) temp0 <- Ecdf(small.nhanes$Age[Smoke==0],weights=small.nhanes$ATT.lr.weight[Smoke==0],pl=F) temp1 <- Ecdf(small.nhanes$Age[Smoke==1],weights=small.nhanes$ATT.lr.weight[Smoke==1],pl=F) plot(temp0$x,temp0$y,ylab="ECDF(Age)",xlab="Age",main="",type="l",lwd=2) lines(temp1$x,temp1$y,col="red",lwd=2) nhanes.ATT.IPW <- svydesign(ids=~0, data=small.nhanes, weights=small.nhanes$ATT.lr.weight) ATT.IPW <- svyCreateTableOne(vars = vars, strata = "SmokeNow", data = nhanes.ATT.IPW, test = FALSE) print(ATT.IPW, smd = TRUE) round(cbind(SMD.ATT,ExtractSmd(ATT.IPW)),3) mean(Smoke*small.nhanes$BPSysAve*small.nhanes$ATT.lr.weight) - mean((1-Smoke)*small.nhanes$BPSysAve*small.nhanes$ATT.lr.weight) @ \end{document}