\documentclass[notitlepage]{article}
\usepackage{Math533}
\usepackage{listings}
\usepackage{numprint}
\usepackage{amsfonts}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{bbm}

\lstloadlanguages{R}

\parindent0in

\lstset{basicstyle=\ttfamily, numbers=none, literate={~} {$\sim$}{2}}

\begin{document}


\begin{center}
\textsclarge{Factor Predictors}
\end{center}

Consider the following data set in \texttt{R}:  from the \texttt{R} help file

\begin{quote}
\emph{The Moore data frame has 45 rows and 4 columns. The data are for subjects in a social-psychological experiment, who were faced with manipulated disagreement from a partner of either of low or high status. The subjects could either conform to the partner's judgment or stick with their own judgment.}
\end{quote}

<<fac1,comment=':', fig.width=5, fig.height=5, fig.align='center'>>=
library(car)  #Need to install this library first
data(Moore)   #See help(Moore)
str(Moore)
@

There are two factors: \texttt{partner.status} with two levels (\texttt{high}, \texttt{low}) and \texttt{fcategory} with three levels (\texttt{high}, \texttt{low}, \texttt{medium}).  There is also a continuous covariate \texttt{conformity}.  The response variable is \texttt{fscore}.  The number of observations in the cross-categories are
<<fac2,comment=':', fig.width=7.5, fig.height=4.5, fig.align='center'>>=
table(Moore$partner.status,Moore$fcategory)
par(mar=c(4,2,2,0))
boxplot(fscore ~ partner.status * fcategory, data=Moore,
        ylab="fscore", main="Boxplots of fscore",col=rep(c('red','blue'),3))
@
The red boxes correspond to the partner.status level \texttt{high}.

First we fit the two single factor models; first the model which may be written
\[
\texttt{1+partner.status}
\]
that uses partner status only.
<<fac3,comment=':', fig.width=5, fig.height=5, fig.align='center'>>=
fit1.only<-lm(fscore~partner.status,data=Moore);summary(fit1.only)
@
This result implies that partner status has no influence on outcome.  The two levels of \texttt{partner.status} give two parameter estimates: the baseline group is factor level \texttt{high} (\texttt{R} chooses the baseline by alphabetical ordering), and the estimated contrast between \texttt{high} and \texttt{low} is
\[
\beta_\text{low}^\smallC = \Sexpr{round(coef(fit1.only)[2],3)}
\]
For the second factor:
<<fac4,comment=':', fig.width=5, fig.height=5, fig.align='center'>>=
fit2.only<-lm(fscore~fcategory,data=Moore);summary(fit2.only)
@
This result implies that \texttt{fcategory} does have an influence on outcome.  The three levels of \texttt{fcategory} give three parameter estimates: the baseline group is factor level \texttt{high}, and the estimated contrasts between \texttt{high} and \texttt{low}, and \texttt{high} and \texttt{medium} are
\[
\beta_\text{low}^\smallC = \Sexpr{format(coef(fit2.only)[2],nsmall=3)}  \qquad \beta_\text{medium}^\smallC = \Sexpr{format(round(coef(fit2.only)[3],3),nsmall=3)}
\]
To use different baseline group, say \texttt{low}, use the following commands, in particular, the function \texttt{relevel()}:
<<fac5,comment=':', fig.width=5, fig.height=5, fig.align='center'>>=
Moore2 <- within(Moore,  fcategory<- relevel(fcategory, ref = 'low'))
fit2.only2<-lm(fscore~fcategory,data=Moore2);summary(fit2.only2)
@
Notice that many of the details of the fit do not change.

\medskip
We now consider the ``main effects only'' model
\[
\texttt{1+partner.status+fcategory}
\]
<<fac6,comment=':', fig.width=5, fig.height=5, fig.align='center'>>=
fit3.add<-lm(fscore~partner.status+fcategory,data=Moore);summary(fit3.add)
@
We can display the fitted modelled means using the function \texttt{lsmeans}:
<<fac7,comment=':', fig.width=5, fig.height=5, fig.align='center'>>=
library(lsmeans)   #Make sure this package is loaded
lsmeans(fit3.add, ~ partner.status * fcategory)
Moore.fit<-Moore;Moore.fit$fit<-fitted(fit3.add)
aggregate(fit~partner.status+fcategory,data=Moore.fit,mean)    #Check
@
For the ANOVA using partial F-tests:
<<fac8,comment=':', fig.width=5, fig.height=5, fig.align='center'>>=
anova(fit3.add)
@
For a residual plot:
<<fac9,comment=':', fig.width=7.5, fig.height=4.5, fig.align='center'>>=
par(mar=c(4,4,2,4))
stripchart(residuals(fit3.add)~partner.status+fcategory,data=Moore,
           pch=19,cex=0.8,ylim=range(-15,15),vertical=TRUE)
abline(h=0,lty=2)
@


\medskip
Finally we now consider the ``main effects plus interaction'' model
\[
\texttt{1+partner.status+fcategory+partner.status:fcategory}
\]
<<fac10,comment=':', fig.width=5, fig.height=5, fig.align='center'>>=
fit4.add<-lm(fscore~partner.status*fcategory,data=Moore);summary(fit4.add)
@
We can display the fitted modelled means using the function \texttt{lsmeans}:
<<fac11,comment=':', fig.width=5, fig.height=5, fig.align='center'>>=
lsmeans(fit4.add, ~ partner.status * fcategory)
@
For the ANOVA using partial F-tests:
<<fac12,comment=':', fig.width=5, fig.height=5, fig.align='center'>>=
anova(fit4.add)
@
For a residual plot:
<<fac13,comment=':', fig.width=7.5, fig.height=4.5, fig.align='center'>>=
par(mar=c(4,4,2,4))
stripchart(residuals(fit4.add)~partner.status+fcategory,data=Moore,
           pch=19,cex=0.8,ylim=range(-15,15),vertical=TRUE)
abline(h=0,lty=2)
@
\end{document}