\documentclass[notitlepage]{article}
\usepackage{Math}
\usepackage{listings}
\usepackage{numprint}
\usepackage{enumerate}
%\usepackage{bbm}
\usepackage{bbm}
\usepackage[all]{xy}
\usepackage{verbatim}
\usepackage{mathtools}
\usepackage{dsfont}

\DeclareMathAlphabet{\mathpzc}{OT1}{pzc}{m}{it}

\makeatletter
\newcommand{\raisemath}[1]{\mathpalette{\raisem@th{#1}}}
\newcommand{\raisem@th}[3]{\raisebox{#1}{$#2#3$}}
\makeatother

\usepackage{amsfonts,amsmath,amssymb}

\newtheorem{alg}{Algorithm}
\newtheorem{ex}{Example}
\newtheorem{note}{Note}
\newenvironment{proof}{\par\noindent{\normalfont{\bf{Proof
}}}}
{\hfill \small$\blacksquare$\\[2mm]}


\setlength{\parindent}{0in}

\def\E{\mathbbmss{E}}
\def\Var{\text{Var}}
\def\Cov{\text{Cov}}
\def\Corr{\text{Corr}}

\def\bW{\mathbf{W}}
\def\bH{\mathbf{H}}

\newcommand{\bs}[1]{\bm{#1}}
\newcommand{\Rho}{\mathrm{P}}
\newcommand{\cif}{\text{ if }}
\newcommand{\Ra}{\Longrightarrow}
\newcommand{\ra}{\longrightarrow}

\def\bphi{\boldsymbol \phi}


\lstloadlanguages{R}

\definecolor{keywordcolor}{rgb}{0,0.6,0.6}
\definecolor{delimcolor}{rgb}{0.461,0.039,0.102}
\definecolor{Rcommentcolor}{rgb}{0.101,0.043,0.432}

\lstdefinestyle{Rsettings}{
  basicstyle=\ttfamily,
  breaklines=true,
  showstringspaces=false,
  keywords={if, else, function, theFunction, tmp}, % Write as many keywords
  otherkeywords={},
  commentstyle=\itshape\color{Rcommentcolor},
  keywordstyle=\color{keywordcolor},
  moredelim=[s][\color{delimcolor}]{"}{"},
}

\lstset{basicstyle=\ttfamily, numbers=none, literate={~} {$\sim$}{2}}

\def\Xb{\mathbf{X}_{\beta}}
\def\Xp{\mathbf{X}_{\psi}}
\def\beps{\boldsymbol{\epsilon}}
\def\betahat{\widehat \beta}
\def\psihat{\widehat \psi}

\usepackage{tikz}
\usetikzlibrary{shapes,decorations,arrows,calc,arrows.meta,fit,positioning}


\usepackage{pgfplots}
\pgfplotsset{compat=1.17}


\begin{document}

<<setup, cache=FALSE, echo=FALSE>>=
library(knitr)
# global chunk options
opts_chunk$set(cache=TRUE, autodep=TRUE)
options(scipen=999)
options(repos=c(CRAN="https://cloud.r-project.org/"))
knit_hooks$set(document = function(x) {sub('\\usepackage[]{color}', '\\usepackage{xcolor}', x, fixed = TRUE)})
@
\begin{center}
{\textsclarge{Optimal Dynamic Treatment Rules}}
\end{center}

For treatment \emph{sequence} $(\rza{1},\rza{2})$, we seek the \emph{optimal} treatment sequence $(z_1^\opt,z_2^\opt)$ to \emph{maximize} expected response.  We decompose the expected (counterfactual) response as
\begin{align*}
\E[Y(\rza{1},\rza{2})] &= \E[Y(z_1^\opt,z_2^\opt)] - \left\{\E[Y(z_1^\opt,z_2^\opt)-Y(\rza{1},z_2^\opt)] \right\} - \left\{\E[Y(\rza{1},z_2^\opt)-Y(\rza{1},\rza{2})] \right\}.
\end{align*}
The terms in the decomposition on the right-hand side are
\begin{itemize}
  \item first term: the response under optimal treatment at both intervals;
  \item second term: the penalty for suboptimal treatment at the first stage, assuming optimal treatment at the second stage;
  \item third term: the penalty for suboptimal treatment at the second stage, assuming the observed treatment at the first stage.
\end{itemize}
This decomposition is the basis of the \emph{Structural Nested Mean Model} (SNMM). Further, we can re-write the second and third terms by comparing to the baseline level (zero):
\begin{align*}
\E[Y(z_1^\opt,z_2^\opt)-Y(\rza{1},z_2^\opt)] & = \E[Y(z_1^\opt,z_2^\opt)-Y(0,z_2^\opt)]  - \E[Y(\rza{1},z_2^\opt)-Y(0,z_2^\opt)] \\[12pt]
\E[Y(\rza{1},z_2^\opt)-Y(\rza{1},\rza{2})] & = \E[Y(\rza{1},z_2^\opt)-Y(\rza{1},0)]  - \E[Y(\rza{1},\rza{2})-Y(\rza{1},0)]
\end{align*}
We then specify \textit{blip} models
\begin{align*}
\E[Y(\rza{1},\rza{2})-Y(\rza{1},0)] & = (\bx_2 \psi_2) \rza{2} & \text{Stage 2} \\[6pt]
\E[Y(\rza{1},z_2^\opt)-Y(0,z_2^\opt)] & = (\bx_1 \psi_1) \rza{1} & \text{Stage 1}
\end{align*}
where
\begin{itemize}
  \item $\bx_2$ can depend on all data -- including observed treatments -- observed up to Stage 2.
  \item $\bx_1$ can depend on all data observed up to Stage 1.
\end{itemize}




\begin{enumerate}[1.]



\item Use \emph{G-estimation} on $Y$ conditioning on $(\bx_1,z_1,\bx_2,z_2)$ at second stage to estimate $\psi_2$ using mean model
\[
Y = \bx_{21} \beta_2 + z_2 \bx_{22} \psi_2 + \varepsilon
\]

\item For each individual infer the optimal Stage 2 treatment: $z_2^\opt = \Ind\{\bx_{22} \widehat \psi_2 > 0\}$

\item Form \emph{pseudo-outcome} $Y_1$
\[
Y_1 = Y - (\bx_{22} \widehat \psi_2) (z_2^\opt - z_2)
\]



\item Use G-estimation on $Y_1$ conditioning on $(\bx_1,z_1)$ at first stage to estimate $\psi_1$ using a proposed mean model
\[
Y_1 = \bx_{11} \beta_1 + z_1 \bx_{12} \psi_1 + \varepsilon
\]

\item For each individual infer the optimal Stage 1 treatment: $z_1^\opt = \Ind\{\bx_{11} \widehat \psi_1 > 0\}$.

\end{enumerate}
The method is robust to mis-specification of the \emph{nuisance mean model}, provided the treatment model is correctly specified.  We can infer the optimized potential outcome
\[
Y + (\bx_{12} \widehat \psi_1) (z_1^\opt - z_1) + (\bx_{22} \widehat \psi_2) (z_2^\opt - z_2)
\]
which takes the observed outcome $Y$ and adds
\begin{itemize}
  \item the additional benefit of optimal treatment at stage 1
  \[
  (\bx_{12} \widehat \psi_1) (z_1^\opt - z_1)
  \]
  \item the additional benefit of optimal treatment at stage 2
  \[
  (\bx_{22} \widehat \psi_2) (z_2^\opt - z_2).
  \]
\end{itemize}

\textbf{Simulation study:}  We simulate the data as follows

\begin{itemize}
  \item $Y(z_1^\opt,z_2^\opt) \sim Normal(200,5^2)$
  \item $X_1 \sim Normal(1,1)$
  \item $Z_1 \sim Bernoulli(\text{expit}(\alpha_{10} + \alpha_{11} X_1))$
  \item $X_2|X_1,Z_1 \sim Normal(X_1+Z_1,1)$
  \item $Z_2|X_1,Z_1,X_2 \sim Bernoulli(\text{expit}(\alpha_{20} + \alpha_{21} X_1+ \alpha_{22} X_2 + \alpha_{23} Z_1))$
  \item Stage 1: $\psi_1 = (-2,1.5)$
  \[
  \bx_1 \psi_1 = \psi_{10} + \psi_{11} X_1
  \]
  \item Stage 2: $\psi_2 = (-2,1,1,-1)$
  \[
  \bx_2 \psi_2 = \psi_{20} + \psi_{21} X_1 + \psi_{22} X_2 + \psi_{23} Z_1
  \]
  
  \item Outcome:
  \[
  Y = Y(z_1^\opt,z_2^\opt) - (\bx_{12} \psi_1) (z_1^\opt - z_1) - (\bx_{22} \psi_2) (z_2^\opt - z_2)
  \]

\end{itemize}
In the two fitting steps 1. and 4., the models
\[
Y = \beta_{20} + z_2 \bx_{22} \psi_2 + \varepsilon
\]
and
\[
Y = \beta_{10} + z_1 \bx_{12} \psi_1  + \varepsilon
\]
are used, with treatment-free intercept only models.  Note that these models are mis-specified.  The G-estimation steps are carried out here using propensity score regression.


<<CA1,comment='+', fig.width=7.25, fig.height=5.0, fig.align='center'>>=
set.seed(34)
expit<-function(x){return(1/(1+exp(-x)))}
n<-1000
al1<-c(-2,1)
al2<-c(-2,3,-1,1)
psi1<-c(-2,1.5)
psi2<-c(-2,1,1,-1)

nreps<-2000
psi1.mat<-matrix(0,nrow=nreps,ncol=length(psi1))
psi2.mat<-matrix(0,nrow=nreps,ncol=length(psi2))
Y.opts<-matrix(0,nrow=nreps,ncol=n)
improved<-rep(0,nreps)

for(irep in 1:nreps){
	X1<-rnorm(n,1,1)
	l1<-al1[1]+al1[2]*X1
	Z1<-rbinom(n,1,expit(l1))
	X2<-rnorm(n)+Z1+X1
	l2<-cbind(1,X1,X2,Z1) %*% al2
	Z2<-rbinom(n,1,expit(l2))

	blip1<-psi1[1]+psi1[2]*X1
	Z1.opt<-as.numeric(blip1>0)

	blip2<-cbind(1,X1,X2,Z1) %*% psi2
	Z2.opt<-as.numeric(blip2>0)

	Y.opt<-rnorm(n,200,5)
	regret2<-(cbind(1,X1,X2,Z1) %*% psi2)*(Z2.opt-Z2)
	regret1<-(cbind(1,X1) %*% psi1)*(Z1.opt-Z1)
	Y<-Y.opt-regret1-regret2

	##############################

	#Fit second stage
	fitz2<-glm(Z2~X1+X2+Z1,family=binomial)
	e2<-fitted(fitz2)

    #Use propensity score regression to ensure double robustness
	fity2<-lm(Y~1+(Z2+Z2:X1+Z2:X2+Z2:Z1)+(e2+e2:X1+e2:X2+e2:Z1))
	psi2.hat<-coef(fity2)[c(2,4,5,6)]
	z2.opt<-as.numeric(cbind(1,X1,X2,Z1) %*% psi2.hat >0)

	Y1<-Y-(cbind(1,X1,X2,Z1) %*% psi2.hat)*(z2.opt-Z2)

	#Fit first stage
	fitz1<-glm(Z1~X1,family=binomial)
	e1<-fitted(fitz1)

    #Use propensity score regression to ensure double robustness
	fity1<-lm(Y1~1+(Z1+Z1:X1)+(e1+e1:X1))
	psi1.hat<-coef(fity1)[c(2,4)]
	z1.opt<-as.numeric(cbind(1,X1) %*% psi1.hat >0)

	psi1.mat[irep,]<-psi1.hat
	psi2.mat[irep,]<-psi2.hat
    
	regret2<-(cbind(1,X1,X2,Z1) %*% psi2.hat)*(z2.opt-Z2)
	regret1<-(cbind(1,X1) %*% psi1.hat)*(z1.opt-Z1)
    
    Y.opts[irep,]<-Y+regret1+regret2
    
    #Number who would have improved
    improved[irep]<-sum(Y < Y.opts[irep,])

}
@

<<CA2,comment='+', fig.width=7.25, fig.height=4.75, fig.align='center',echo=FALSE>>=
nv<-c(expression(hat(psi)[10]),expression(hat(psi)[11]))
par(mar=c(3,3,2,0))
boxplot(psi1.mat,pch=19,cex=0.5,names=nv,main='First stage parameters (2000 replicates)')
points(c(1:2),psi1,col='red',pch=20,cex=2)
nv<-c(expression(hat(psi)[20]),expression(hat(psi)[21]),
        expression(hat(psi)[22]),expression(hat(psi)[23]))
par(mar=c(3,3,2,0))
boxplot(psi2.mat,pch=19,cex=0.5,names=nv,main='Second stage parameters (2000 replicates)')
points(c(1:4),psi2,col='red',pch=20,cex=2)
par(mar=c(3,3,2,0))
@


<<CA3,comment='+', fig.width=7.25, fig.height=4.75, fig.align='center'>>=
#Mean number of subjects who were treated sub-optimally
mean(improved)
@


\end{document}