\documentclass[notitlepage]{article} \usepackage{Math533} \usepackage{listings} \usepackage{numprint} \usepackage{amsfonts} \usepackage{amsmath} \usepackage{amssymb} \usepackage{bbm} \usepackage{bbold} \lstloadlanguages{R} \parindent0in \lstset{basicstyle=\ttfamily, numbers=none, literate={~} {$\sim$}{2}} \newcommand{\smallG}{\textnormal{\tiny \text{G}}} \def\Ind{\mathbb{1}} \begin{document} \begin{center} \textsclarge{Models without an Intercept} \end{center} Consider the balanced two factor design: \begin{itemize} \item Factor $A$: 3 levels, indexed $j=0,1,2$; \item Factor $B$: 5 levels, indexed $l=0,1,2,3,4$; \item $n_{jl} = 4$ replicate observations for each factor level combination; \item total sample size $n=3\times 5 \times 4 = 60$. \end{itemize} Data can be simulated as follows: <>= set.seed(2332) nk<-4; m.A<-3; m.B<-5 n<-nk*m.A*m.B A<-rep(gl(m.A,m.B,labels=c(0:(m.A-1))),nk) B<-rep(gl(m.B,1,length=m.A*m.B,labels=c(0:(m.B-1))),nk) beta.A<-c(0,2,-1) beta.B<-c(1,1,2,3,1) beta.AB<-matrix(0,nrow=m.A,ncol=m.B) beta.AB[2,2:5]<-0.1 beta.AB[3,2:5]<--0.1 Y<-beta.A[as.numeric(A)]+beta.B[as.numeric(B)]+diag(beta.AB[as.numeric(A),as.numeric(B)])+rnorm(n)*2 table(A,B) @ Consider first the model $A$ that fits the intercept plus the $A$ factor. This model supposes that the three groups of 20 subjects, defined by the three levels of factor $A$, each have a different mean. The mean model is \[ \Expect[Y_i|\bx_i] = \beta_0 + \sum_{j=1}^2 \beta_{Aj}^\smallC \Ind_{j}(A_i) = \left\{ \begin{array}{ll} \beta_0 & A=0 \\[6pt] \beta_0 + \beta_{A1}^\smallC & A_i=1 \\[6pt] \beta_0 + \beta_{A2}^\smallC & A_i=2 \end{array} \right. \] respectively. We define the \textbf{\text{group}} means by the usual reparameterization: \[ \beta_0^\smallG = \beta_0 \qquad \beta_1^\smallG = \beta_0 + \beta_{A1}^\smallC \qquad \beta_2^\smallG = \beta_0 + \beta_{A2}^\smallC \] <>= fit1<-lm(Y~A); summary(fit1) @ which reveals the estimates $\widehat \beta_0 = \Sexpr{round(coef(fit1)[1],4)}, \widehat \beta_{A1}^\smallC = \Sexpr{round(coef(fit1)[2],4)}, \widehat \beta_{A2}^\smallC = \Sexpr{round(coef(fit1)[3],4)}$ and group mean estimates $\widehat \beta_0^\smallG = \Sexpr{round(coef(fit1)[1],4)}$, \[ \widehat \beta_1^\smallG = \Sexpr{round(coef(fit1)[1],4)} + \Sexpr{round(coef(fit1)[2],4)} = \Sexpr{round(sum(coef(fit1)[1:2]),4)} \qquad \widehat \beta_2^\smallG = \Sexpr{round(coef(fit1)[1],4)} \Sexpr{round(coef(fit1)[3],4)} = \Sexpr{round(sum(coef(fit1)[c(1,3)]),4)}. \] Using the formula \texttt{Y$\sim$\:-1+A}, we may reparameterize directly. <>= fit2<-lm(Y~-1+A); summary(fit2) @ This produces the same estimates of group means $\beta_k^\smallG, k=0,1,2$. The two models yield the same $\SSRes$ quantity, as expected. <>= anova(fit1) anova(fit2) @ However, the ANOVA tables are \textbf{different}, as are the $R^2$ quantities. This is because the analysis with the intercept \textbf{included} uses the decomposition \[ \SST = \SSRes + \SSR \] and the model with the intercept \textbf{excluded} uses the decomposition \[ \SSTover = \SSRes + \SSRover \] where \[ \SST = \sum_{i=1}^n y_i^2 - n (\overline{y})^2 = \SSTover - n (\overline{y})^2. \] and $\SSRover = \SSR + n (\overline{y})^2$. Thus the difference between the \texttt{Sum Sq} quantities is $n\overline{y}^2=\Sexpr{n*mean(Y)^2}$. \[ n (\overline{y})^2 = 270.15 - 50.00 = 220.15 \] The $R^2$ values for the models with and without intercepts are \[ \frac{\SSR}{\SST} = \frac{\SSR}{\SSRes+\SSR} = \frac{50.00}{50.00+326.81} = 0.13269 \quad \text{and} \quad \frac{\SSRover}{\SSTover} = \frac{\SSRover}{\SSRes+\SSRover} = \frac{270.15}{270.15+326.81} = 0.45254 \] respectively. We now consider the model $A+B$ with and without the intercept: <>= fit3<-lm(Y~A+B);summary(fit3) fit4<-lm(Y~-1+A+B);summary(fit4) @ The parameter estimates for the levels of factor $A$ change in the same way as before; the contrast parameter estimates do not change. <>= tapply(Y,INDEX=list(A=A,B=B),mean) #Data means by group tapply(fitted(fit3),INDEX=list(A=A,B=B),mean) #Fitted means with intercept tapply(fitted(fit4),INDEX=list(A=A,B=B),mean) #Fitted means without intercept @ In an interaction model: <>= fit5<-lm(Y~A*B); summary(fit5) fit6<-lm(Y~-1+A*B); summary(fit6) @ The fitted values from this model recover the group means exactly: <>= tapply(Y,INDEX=list(A=A,B=B),mean) tapply(fitted(fit6),INDEX=list(A=A,B=B),mean) @ <>= anova(fit3,fit5) anova(fit5) @ \end{document}