\documentclass[notitlepage]{article} \usepackage{Math598} \usepackage{listings} \usepackage{numprint} \usepackage{enumerate} \usepackage{bbm} \usepackage{amsfonts,amsmath} \def\E{\Expect} \lstloadlanguages{R} \definecolor{keywordcolor}{rgb}{0,0.6,0.6} \definecolor{delimcolor}{rgb}{0.461,0.039,0.102} \definecolor{Rcommentcolor}{rgb}{0.101,0.043,0.432} \lstdefinestyle{Rsettings}{ basicstyle=\ttfamily, breaklines=true, showstringspaces=false, keywords={if, else, function, theFunction, tmp}, % Write as many keywords otherkeywords={}, commentstyle=\itshape\color{Rcommentcolor}, keywordstyle=\color{keywordcolor}, moredelim=[s][\color{delimcolor}]{"}{"}, } \lstset{basicstyle=\ttfamily, numbers=none, literate={~} {$\sim$}{2}} \begin{document} \begin{center} {\textsclarge{Simpson's Paradox with Continuous variables}} \end{center} <>= library(MASS) set.seed(2111) #Set the random number generator seed value n<-10000 #Set the sample size X<-rnorm(n,10,5) #Generate the X random variables Sig.YZ<-matrix(c(1,-0.9,-0.9,1),2,2) #Conditional variance-covariance for Y,Z given X YZ<-mvrnorm(n,mu=c(0,0),Sigma=Sig.YZ) #Generate the Y,Z variables Y<-X+YZ[,1];Z<-X+YZ[,2] #Change the mean according to X par(mar=c(4,4,1,0),pty='s') #Set up the plotting margins plot(Z,Y,pch=19,cex=0.8) @ <>= Y1<-Y[X>4.5 & X < 5.5];Z1<-Z[X>4.5 & X < 5.5]; #First subset analysis Y2<-Y[X>9.5 & X < 10.5];Z2<-Z[X>9.5 & X < 10.5]; #Second subset analysis Y3<-Y[X>14.5 & X < 15.5];Z3<-Z[X>14.5 & X < 15.5]; #Third subset analysis par(mar=c(4,2,1,0),pty='s',mfrow=c(2,2)) #Set up the plotting margins plot(Z1,Y1,pch=19,cex=0.8) plot(Z2,Y2,pch=19,cex=0.8) plot(Z3,Y3,pch=19,cex=0.8) @ <>= coef(summary(lm(Y1~Z1))) #Group 1 regression coef(summary(lm(Y2~Z2))) #Group 2 regression coef(summary(lm(Y3~Z3))) #Group 3 regression coef(summary(lm(c(Y1,Y2,Y3)~c(Z1,Z2,Z3)))) #Pooled regression @ <>= par(mar=c(4,4,1,0),pty='s') plot(Z,Y,type='n') points(Z1,Y1,pch=19,cex=0.8,col='red') points(Z2,Y2,pch=19,cex=0.8,col='blue') points(Z3,Y3,pch=19,cex=0.8,col='green') abline(lm(c(Y1,Y2,Y3)~c(Z1,Z2,Z3)),lty=2) abline(lm(Y1~Z1),col='red') abline(lm(Y2~Z2),col='blue') abline(lm(Y3~Z3),col='green') legend(10,max(Y),c('Group 0','Group 1','Group 2','Pooled'), col=c('red','blue','green','black'),lty=c(1,1,1,2)) cor(cbind(X,Y,Z)) @ \end{document}