\documentclass[notitlepage]{article}
\usepackage{Math533}
\usepackage{listings}
\usepackage{numprint}
\usepackage{amsfonts}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{bbm}
\usepackage{bbold}

\lstloadlanguages{R}

\parindent0in

\lstset{basicstyle=\ttfamily, numbers=none, literate={~} {$\sim$}{2}}

\newcommand{\smallG}{\textnormal{\tiny \text{G}}}
\def\Ind{\mathbb{1}}

\begin{document}


\begin{center}
\textsclarge{Models without an Intercept}
\end{center}

Consider the balanced two factor design:

\begin{itemize}
\item Factor $A$: 3 levels, indexed $j=0,1,2$;

\item Factor $B$: 5 levels, indexed $l=0,1,2,3,4$;

\item $n_{jl} = 4$ replicate observations for each factor level combination;

\item total sample size $n=3\times 5 \times 4 = 60$.
\end{itemize}
Data can be simulated as follows:

<<tap1,comment=':', fig.width=7.5, fig.height=4.5, fig.align='center'>>=
set.seed(2332)
nk<-4; m.A<-3; m.B<-5
n<-nk*m.A*m.B
A<-rep(gl(m.A,m.B,labels=c(0:(m.A-1))),nk)
B<-rep(gl(m.B,1,length=m.A*m.B,labels=c(0:(m.B-1))),nk)
beta.A<-c(0,2,-1)
beta.B<-c(1,1,2,3,1)
beta.AB<-matrix(0,nrow=m.A,ncol=m.B)
beta.AB[2,2:5]<-0.1
beta.AB[3,2:5]<--0.1
Y<-beta.A[as.numeric(A)]+beta.B[as.numeric(B)]+diag(beta.AB[as.numeric(A),as.numeric(B)])+rnorm(n)*2
table(A,B)
@
Consider first the model $A$ that fits the intercept plus the $A$ factor.  This model supposes that the three groups of 20 subjects, defined by the three levels of factor $A$, each have a different mean. The mean model is
\[
\Expect[Y_i|\bx_i] = \beta_0 + \sum_{j=1}^2 \beta_{Aj}^\smallC \Ind_{j}(A_i) = \left\{
\begin{array}{ll}
  \beta_0 & A=0 \\[6pt]
  \beta_0 + \beta_{A1}^\smallC & A_i=1 \\[6pt]
  \beta_0 + \beta_{A2}^\smallC & A_i=2
\end{array}
\right.
\]
respectively.  We define the \textbf{\text{group}} means by the usual reparameterization:
\[
\beta_0^\smallG = \beta_0 \qquad  \beta_1^\smallG = \beta_0 + \beta_{A1}^\smallC \qquad \beta_2^\smallG = \beta_0 + \beta_{A2}^\smallC
\]
<<tap2,comment=':', fig.width=7.5, fig.height=4.5, fig.align='center'>>=
fit1<-lm(Y~A); summary(fit1)
@
which reveals the estimates $\widehat \beta_0 = \Sexpr{round(coef(fit1)[1],4)}, \widehat \beta_{A1}^\smallC = \Sexpr{round(coef(fit1)[2],4)}, \widehat \beta_{A2}^\smallC = \Sexpr{round(coef(fit1)[3],4)}$ and group mean estimates $\widehat \beta_0^\smallG = \Sexpr{round(coef(fit1)[1],4)}$,
\[
\widehat \beta_1^\smallG = \Sexpr{round(coef(fit1)[1],4)} + \Sexpr{round(coef(fit1)[2],4)} = \Sexpr{round(sum(coef(fit1)[1:2]),4)} \qquad \widehat \beta_2^\smallG = \Sexpr{round(coef(fit1)[1],4)} \Sexpr{round(coef(fit1)[3],4)} = \Sexpr{round(sum(coef(fit1)[c(1,3)]),4)}.
\]
Using the formula \texttt{Y$\sim$\:-1+A}, we may reparameterize directly.
<<tap3,comment=':', fig.width=7.5, fig.height=4.5, fig.align='center'>>=
fit2<-lm(Y~-1+A); summary(fit2)
@
This produces the same estimates of group means $\beta_k^\smallG, k=0,1,2$. The two models yield the same $\SSRes$ quantity, as expected.
<<tap4,comment=':', fig.width=7.5, fig.height=4.5, fig.align='center'>>=
anova(fit1)
anova(fit2)
@
However, the ANOVA tables are \textbf{different}, as are the $R^2$ quantities.  This is because the analysis with the intercept \textbf{included} uses the decomposition
\[
\SST = \SSRes + \SSR
\]
and the model with the intercept \textbf{excluded} uses the decomposition
\[
\SSTover = \SSRes + \SSRover
\]
where
\[
\SST = \sum_{i=1}^n y_i^2 - n (\overline{y})^2 = \SSTover - n (\overline{y})^2.
\]
and $\SSRover = \SSR + n (\overline{y})^2$. Thus the difference between the \texttt{Sum Sq} quantities is $n\overline{y}^2=\Sexpr{n*mean(Y)^2}$.
\[
n (\overline{y})^2 = 270.15 - 50.00 = 220.15
\]
The $R^2$ values for the models with and without intercepts are
\[
\frac{\SSR}{\SST} = \frac{\SSR}{\SSRes+\SSR} = \frac{50.00}{50.00+326.81} = 0.13269
\quad \text{and} \quad \frac{\SSRover}{\SSTover} = \frac{\SSRover}{\SSRes+\SSRover} = \frac{270.15}{270.15+326.81} = 0.45254
\]
respectively.  We now consider the model $A+B$ with and without the intercept:
<<tap6,comment=':', fig.width=7.5, fig.height=4.5, fig.align='center'>>=
fit3<-lm(Y~A+B);summary(fit3)
fit4<-lm(Y~-1+A+B);summary(fit4)
@
The parameter estimates for the levels of factor $A$ change in the same way as before; the contrast parameter estimates do not change.
<<tap7,comment=':', fig.width=7.5, fig.height=4.5, fig.align='center'>>=
tapply(Y,INDEX=list(A=A,B=B),mean)  #Data means by group
tapply(fitted(fit3),INDEX=list(A=A,B=B),mean) #Fitted means with intercept
tapply(fitted(fit4),INDEX=list(A=A,B=B),mean) #Fitted means without intercept
@
In an interaction model:
<<tap8,comment=':', fig.width=7.5, fig.height=4.5, fig.align='center'>>=
fit5<-lm(Y~A*B); summary(fit5)
fit6<-lm(Y~-1+A*B); summary(fit6)
@
The fitted values from this model recover the group means exactly:
<<tap9,comment=':', fig.width=7.5, fig.height=4.5, fig.align='center'>>=
tapply(Y,INDEX=list(A=A,B=B),mean)
tapply(fitted(fit6),INDEX=list(A=A,B=B),mean)
@
<<tap10,comment=':', fig.width=7.5, fig.height=4.5, fig.align='center'>>=
anova(fit3,fit5)
anova(fit5)
@

\end{document}