\documentclass[notitlepage]{article} \usepackage{Math598} \usepackage{listings} \usepackage{numprint} \usepackage{enumerate} \usepackage{bbm} \usepackage{amsfonts,amsmath} \usepackage{tikz} \usetikzlibrary{shapes,decorations,arrows,calc,arrows.meta,fit,positioning} \def\E{\Expect} \lstloadlanguages{R} \definecolor{keywordcolor}{rgb}{0,0.6,0.6} \definecolor{delimcolor}{rgb}{0.461,0.039,0.102} \definecolor{Rcommentcolor}{rgb}{0.101,0.043,0.432} \lstdefinestyle{Rsettings}{ basicstyle=\ttfamily, breaklines=true, showstringspaces=false, keywords={if, else, function, theFunction, tmp}, % Write as many keywords otherkeywords={}, commentstyle=\itshape\color{Rcommentcolor}, keywordstyle=\color{keywordcolor}, moredelim=[s][\color{delimcolor}]{"}{"}, } \lstset{basicstyle=\ttfamily, numbers=none, literate={~} {$\sim$}{2}} \parindent0in \begin{document} \begin{center} {\textsclarge{Conditioning on a Collider}} \end{center} \tikzset{ -Latex,auto,node distance =1 cm and 1 cm,semithick, state/.style ={circle, draw, minimum width = 0.7 cm}, box/.style ={rectangle, draw, minimum width = 0.7 cm, fill=lightgray}, point/.style = {circle, draw, inner sep=0.08cm,fill,node contents={}}, bidirected/.style={Latex-Latex,dashed}, el/.style = {inner sep=3pt, align=left, sloped} } The graph \begin{figure}[ht] \centering \begin{tikzpicture}[scale=1.5] % x node set with absolute coordinates \node[state] (x) at (-1,0) {${X}$}; \node[state] (y) at (1,0){${Y}$}; \node[state] (z) at (0,-1) {${Z}$}; \node[state] (u) at (0,0) {${U_Z}$}; \path (x) edge (z); \path (y) edge (z); \path (u) edge (z); \end{tikzpicture} \end{figure} encodes the dependencies between the four random variables $(X,Y,Z,U_Z)$: the joint density can be represented \[ f_{X,Y,Z,U_Z}(x,y,z,u) = f_X(x) f_Y(y) f_{U_Z}(u) f_{Z|X,Y,U_Z}(z|x,y,u) \] that is, $X,Y,U_Z$ are mutually independent. Suppose that $X$ and $Y$ are distributed as standard Normal random variables, $U_Z \sim \text{Normal}(0,0.1^2)$, and \[ Z= X + Y + U_Z \] <>= set.seed(2101) #Set the random number generator seed value n<-10000 #Set the sample size X<-rnorm(n,0,1) #Generate the X random variables Y<-rnorm(n,0,1) #Generate the Y random variables UZ<-rnorm(n,0,0.1) #Generate the UZ random variables Z<-X+Y+UZ par(mar=c(3,2,1,0)) #Set up the plotting margins pairs(cbind(X,Y,Z),pch=19,cex=0.7) @ If we condition on the value of $Z$, and inspect the joint density of $X$ and $Y$ given $Z$, we see that $X$ and $Y$ are conditionally \textbf{dependent}. <>= par(mar=c(3,2,1,0)) #Set up the plotting margins X1<-X[Z>-2.5 & Z < -1.5];Y1<-Y[Z>-2.5 & Z < -1.5]; #First subset analysis X2<-X[Z>-0.5 & Z < 0.5];Y2<-Y[Z>-0.5 & Z < 0.5]; #Second subset analysis X3<-X[Z>0.5 & Z < 1.5];Y3<-Y[Z>0.5 & Z < 1.5]; #Third subset analysis par(mar=c(4,3,1,0),pty='s',mfrow=c(2,2)) #Set up the plotting margins plot(X1,Y1,pch=19,cex=0.7) plot(X2,Y2,pch=19,cex=0.7) plot(X3,Y3,pch=19,cex=0.7) cor(X1,Y1) cor(X2,Y2) cor(X3,Y3) @ \end{document}