#------------------------------------------------------------------- # 1.1 SIMPLE REGRESSION USING SPLUS #------------------------------------------------------------------- x<-c(6,6.3,6.5,6.8,7,7.1,7.5,7.5,7.6) y<-c(39,58,49,53,80,86,115,124,104) par(mfrow=c(2,2)) plot(x,y,main="Figure 1.1 Simple regression") glm.out<-glm(y~x) glm.out summary(glm.out) lines(x,glm.out$fitted.values) glm(y~1) r<-(7612-1013.765)/7612 r tstat<-50.41952/7.469724 tstat tstat^2/(7+tstat^2) #------------------------------------------------------------------- # 1.2 MAYER'S ESTIMATE OF THE LIBRATION OF THE MOON #------------------------------------------------------------------- m<-matrix(scan("/glim/datasets/moon.dat"),byrow=T,ncol=5) m y<- -m[,1]-m[,2]/60 y alpha<-m[,3] asin<-m[,4] grp<-m[,5] glm.out<-glm(y~alpha+asin) glm.out summary(glm.out) x<-cbind(rep(1,27),alpha,asin) x beta.ls<-solve(t(x) %*% x, t(x) %*% y) beta.ls g<- cbind(grp==1,grp==2,grp==3)*1 g beta.m<-solve(t(g) %*% x, t(g) %*% y) beta.m sum((y-x %*% beta.m)^2) summary(glm.out)$cov.unscaled * summary(glm.out)$dispersion a<-solve(t(g) %*% x, t(g)) a %*% t(a) * summary(glm.out)$dispersion plot(asin,alpha,type="n",main="Figure 1.2 Mayer's groups") for(i in 1:3) text(asin[grp==i],alpha[grp==i],i) #------------------------------------------------------------------- # 1.3 MULTIPLE REGRESSION #------------------------------------------------------------------- m<-matrix(scan("/glim/datasets/peru"),byrow=T,ncol=10) m age<-m[,1] mig<-m[,2] wt<-m[,3] ht<-m[,4] chin<-m[,5] arm<-m[,6] calf<-m[,7] pulse<-m[,8] sys<-m[,9] dias<-m[,10] plot(mig,sys,main="Figure 1.3 Peru, effect of mig") summary(glm(sys~mig)) pt(-0.5341708,37) f<-(6531.436-6481.452 )/175.1744 c(f,1-pf(f,1,37)) -0.5341708^2 0.2982093*2 plot(wt,sys,main="Figure 1.4 Peru, effect of wt") summary(glm(sys~wt)) summary(glm(sys~wt+mig)) (4756.056-3783.157)/105.0877 -3.042691^2 pt(-3.042691,36) f<-(6531.436-3783.157)/2/105.0877 c(f,1-pf(f,2,36)) plot(wt,mig,main="Figure 1.5 mig vs wt") rmig<-glm(mig~wt)$residuals rsys<-glm(sys~wt)$residuals plot(rmig,rsys,main="Figure 1.6 Peru") summary(glm(rsys~rmig-1)) 3783.157/36 plot(mig,sys,type="n",main="Figure 1.7 Point label = wt/10") wf<-floor(wt/10); wf for(i in 5:8) text(mig[wf==i],sys[wf==i],i) summary(glm(sys~ht+mig)) plot(ht,mig,main="Figure 1.8 mig vs ht") summary(glm(sys~ht+mig+wt+chin+arm+calf+age)) f<-(3783.157-3269.668)/5/105.4732 c(f,1-pf(f,5,31)) #------------------------------------------------------------------- # 2.1 FACTORS AND INTERACTION #------------------------------------------------------------------- m<-matrix(scan("/glim/datasets/educexp"),ncol=5,byrow=T) m urb<-m[,1] exp<-m[,2] inc<-m[,3] n18<-m[,4] reg<-m[,5] plot(reg,exp,main="Figure 2.1 Education") r1<-(reg==1)*1 r2<-(reg==2)*1 r3<-(reg==3)*1 r4<-(reg==4)*1 cbind(reg,r1,r2,r3,r4) summary(glm(exp~1)) summary(glm(exp~r1+r2+r3+r4)) f<-(113615.5-84430.74)/3/1918.88 c(f,1-pf(f,3,44)) summary(glm(exp~r1+r2+r3+r4-1)) for(i in 1:4) print(c(i,mean(exp[reg==i]))) Reg<-factor(reg) options(contrasts=c("contr.treatment", "contr.poly")) summary(glm(exp~Reg)) summary(glm(exp~r2+r3+r4)) plot(inc,exp,type="n",main="Figure 2.2 No interaction") for(i in 1:4) text(inc[reg==i],exp[reg==i],i) summary(glm(exp~inc)) glm1<-glm(exp~inc+Reg) summary(glm1) f<-(70359.97-57509.02)/3/1337.419 c(f,1-pf(f,3,43)) for(i in 1:4) lines(inc[reg==i],glm1$fitted.values[reg==i]) ir1<-inc*r1; ir2<-inc*r2; ir3<-inc*r3; ir4<-inc*r4 cbind(reg,inc,ir1,ir2,ir3,ir4) glm2<-glm(exp~inc+Reg+ir1+ir2+ir3+ir4) summary(glm2) f<-(57509.02-48839.48)/3/1220.987 c(f,1-pf(f,3,40)) plot(inc,exp,type="n",main="Figure 2.3 With interaction") for(i in 1:4) { text(inc[reg==i],exp[reg==i],i) lines(inc[reg==i],glm2$fitted.values[reg==i]) } summary(glm(exp~inc+Reg+inc:Reg)) summary(glm(exp~Reg+inc:Reg-1)) summary(glm(exp~inc,weight=r1)) summary(glm(exp~inc+urb+n18)) summary(glm(exp~inc+urb+n18+Reg)) f<-(57324.58-51858.94)/3/1264.852 c(f,1-pf(f,3,41)) summary(glm(exp~inc+urb+n18+Reg+(inc+urb+n18):Reg)) f<-(51858.94-37926.18)/9/1185.193 c(f,1-pf(f,9,32)) #------------------------------------------------------------------- # 2.2 ORTHOGONALITY AND BALANCED DESIGNS #------------------------------------------------------------------- m<-matrix(scan("/glim/datasets/medcare"),ncol=3,byrow=T) m sat<-m[,1] com<-factor(m[,2]) worry<-factor(m[,3]) glm0<-glm(sat~1); summary(glm0) glmc<-glm(sat~com); summary(glmc) glm0$deviance-glmc$deviance glmw<-glm(sat~worry); summary(glmw) glmcw<-glm(sat~com+worry); summary(glmcw) glmw$deviance-glmcw$deviance summary(glm(sat~com+worry+com:worry)) summary(glm(sat~com:worry-1)) m<-matrix(scan("/glim/datasets/medcareb"),ncol=3,byrow=T) m sat<-m[,1] com<-factor(m[,2]) worry<-factor(m[,3]) glm0<-glm(sat~1); summary(glm0) glmc<-glm(sat~com); summary(glmc) glm0$deviance-glmc$deviance glmw<-glm(sat~worry); summary(glmw) glmcw<-glm(sat~com+worry); summary(glmcw) glmw$deviance-glmcw$deviance m<-matrix(scan("/glim/datasets/glue"),ncol=3,byrow=T) m stren<-m[,1] temp<-m[,2] humid<-m[,3] glm0<-glm(stren~1); summary(glm0) glmt<-glm(stren~temp); summary(glmt) glm0$deviance-glmt$deviance glmh<-glm(stren~humid); summary(glmh) glmht<-glm(stren~humid+temp); summary(glmht) glmh$deviance-glmht$deviance xstren<-c(190,189,190,188,196,192,195,197,203,203,203,204) xtemp<-c(rep(80,6),rep(90,6)) cbind(xstren,xtemp,humid) glm0<-glm(xstren~1); summary(glm0) glmt<-glm(xstren~xtemp); summary(glmt) glm0$deviance-glmt$deviance glmh<-glm(xstren~humid); summary(glmh) glmht<-glm(xstren~humid+xtemp); summary(glmht) glmh$deviance-glmht$deviance #------------------------------------------------------------------- # 3.1 MODEL SELECTION #------------------------------------------------------------------- m<-matrix(scan("/glim/datasets/rents1"),ncol=4,byrow=T) m dist<-factor(m[,1]) room<-m[,2] rntc<-m[,3] rntv<-m[,4] par(mfrow=c(2,2)) plot(dist,rntc,main="Figure 3.1 Rent vs. district") plot(room,rntc,main="Figure 3.2 Rent vs. rooms") sse<-rep(0,10) p<-sse msep1<-sse msep2<-sse glm<-glm(rntc~1); glm sse[1]<-glm$deviance; p[1]<-length(glm$coefficients) msep1[1]<-sum((rntv-glm$fitted.values)^2) vl<-predict.glm(glm,se.fit=T)$se.fit^2 sc<-summary(glm)$dispersion msep2[1]<-sum((glm$residuals/(1-vl/sc))^2) glm<-glm(rntc~room); glm sse[2]<-glm$deviance; p[2]<-length(glm$coefficients) msep1[2]<-sum((rntv-glm$fitted.values)^2) vl<-predict.glm(glm,se.fit=T)$se.fit^2 sc<-summary(glm)$dispersion msep2[2]<-sum((glm$residuals/(1-vl/sc))^2) glm<-glm(rntc~dist); glm sse[3]<-glm$deviance; p[3]<-length(glm$coefficients) msep1[3]<-sum((rntv-glm$fitted.values)^2) vl<-predict.glm(glm,se.fit=T)$se.fit^2 sc<-summary(glm)$dispersion msep2[3]<-sum((glm$residuals/(1-vl/sc))^2) glm<-glm(rntc~dist+room); glm sse[4]<-glm$deviance; p[4]<-length(glm$coefficients) msep1[4]<-sum((rntv-glm$fitted.values)^2) vl<-predict.glm(glm,se.fit=T)$se.fit^2 sc<-summary(glm)$dispersion msep2[4]<-sum((glm$residuals/(1-vl/sc))^2) room2<-room^2 glm<-glm(rntc~dist+room+room2); glm sse[5]<-glm$deviance; p[5]<-length(glm$coefficients) msep1[5]<-sum((rntv-glm$fitted.values)^2) vl<-predict.glm(glm,se.fit=T)$se.fit^2 sc<-summary(glm)$dispersion msep2[5]<-sum((glm$residuals/(1-vl/sc))^2) Room<-factor(room) glm<-glm(rntc~dist+room+room2+Room); glm sse[6]<-glm$deviance; p[6]<-length(glm$coefficients) msep1[6]<-sum((rntv-glm$fitted.values)^2) vl<-predict.glm(glm,se.fit=T)$se.fit^2 sc<-summary(glm)$dispersion msep2[6]<-sum((glm$residuals/(1-vl/sc))^2) glm<-glm(rntc~dist+room+dist:room); glm sse[7]<-glm$deviance; p[7]<-length(glm$coefficients) msep1[7]<-sum((rntv-glm$fitted.values)^2) vl<-predict.glm(glm,se.fit=T)$se.fit^2 sc<-summary(glm)$dispersion msep2[7]<-sum((glm$residuals/(1-vl/sc))^2) glm<-glm(rntc~dist+Room+dist:room); glm sse[8]<-glm$deviance; p[8]<-length(glm$coefficients) msep1[8]<-sum((rntv-glm$fitted.values)^2) vl<-predict.glm(glm,se.fit=T)$se.fit^2 sc<-summary(glm)$dispersion msep2[8]<-sum((glm$residuals/(1-vl/sc))^2) glm<-glm(rntc~dist+Room+dist:room+dist:room2); glm sse[9]<-glm$deviance; p[9]<-length(glm$coefficients) msep1[9]<-sum((rntv-glm$fitted.values)^2) vl<-predict.glm(glm,se.fit=T)$se.fit^2 sc<-summary(glm)$dispersion msep2[9]<-sum((glm$residuals/(1-vl/sc))^2) sse[10]<-0; p[10]<-length(rntc) msep1[10]<-sum((rntv-rntc)^2) msep2[10]<-NA msep3<-sse+2*p*sc cbind(p,sse,msep1,msep2,msep3) m<-matrix(scan("/glim/datasets/rents2"),ncol=3,byrow=T) dist<-factor(m[,1]) room<-m[,2] rent<-m[,3] w<-c(rep(1,129),rep(0,129)) glm<-glm(rent~dist+room,weight=w) glm cbind(rent,glm$fitted.values,glm$residuals) msep<-sum((1-w)*glm$residuals^2) msep #------------------------------------------------------------------- # 3.2 STEIN SHRINKAGE: PREDICTING HOCKEY SCORES #------------------------------------------------------------------- m<-matrix(scan("/glim/datasets/nhl8586"),ncol=2,byrow=T) m ycal<-m[,1] yval<-m[,2] plot(ycal,yval,main="Figure 3.3 Hockey") abline(0,1) msep<-sum((ycal-yval)^2) msep glm<-glm(yval~ycal) glm abline(coef(glm),lty=2) mean(ycal) abline(80,0,lty=3) msep<-sum((80-yval)^2) msep m<-matrix(scan("/glim/datasets/nhl82_84"),ncol=3,byrow=T) m y1<-m[,1] y2<-m[,2] y3<-m[,3] ym<-(y1+y2+y3)/3 sigma2<-sum((y1-ym)^2+(y2-ym)^2+(y3-ym)^2)/(2*20) sigma2 f<-sum((ycal-80)^2)/20/sigma2 f k<-1-1/f*18/20*40/42 k yshrink<-(1-k)*80+k*ycal abline((1-k)*80,k,lty=4) legend(47,120,c("Calib","Regres","80","Shrink"),lty=1:4) cbind(ycal,yshrink,yval) msep<-sum((yshrink-yval)^2) msep #------------------------------------------------------------------- # 4.1 RESIDUALS #------------------------------------------------------------------- m<-matrix(scan("/glim/datasets/peru"),byrow=T,ncol=10) mig<-m[,2] wt<-m[,3] sys<-m[,9] glm<-glm(sys~mig+wt) summary(glm) cbind(sys,fitted(glm),resid(glm))[1:4,] d1<-c(1,rep(0,38)) glm<-glm(sys~mig+wt+d1) summary(glm) cbind(sys,fitted(glm),resid(glm))[1:4,] glm<-glm(sys~mig+wt,weight=1-d1) summary(glm) cbind(sys,fitted(glm),resid(glm))[1:4,] glm<-glm(sys~mig+wt) vl<-predict.glm(glm,se.fit=T)$se.fit^2 sc<-summary(glm)$dispersion r<-resid(glm) z<-r/sqrt(sc-vl) 24.113181/(1-vl[1]/sc) df<-glm$df.residual tstat<-z/sqrt((df-z^2)/(df-1)) cbind(r,z,tstat) max(abs(tstat)) pval<-pt(-max(abs(tstat)),df-1)*2 c(pval,pval*39) -qt(0.05/2/39,df-1) par(mfrow=c(2,2)) plot(fitted(glm),r,main="Figure 4.1 Peru residuals vs fitted") abline(0,0) hist(z,main="Figure 4.2 Peru normalized residuals, z") u<-pnorm(z) hist(u,main="Figure 4.3 Peru uniforms, u") i<-1:39 uhat<-(i-0.5)/39 u<-sort(u) plot(uhat,u,xaxs="s",yaxs="s",main="Figure 4.4 Peru P-P plot") abline(0,1) zhat<-qnorm(uhat) z<-sort(z) plot(zhat,z,main="Figure 4.5 Peru Q-Q plot") abline(0,1) k<-sqrt(39)*(max(u-uhat)+0.5/39) k pk<-2*exp(-2*k^2) pk pk-2*exp(-2*4*k^2) #------------------------------------------------------------------- # 4.2 TRANSFORMS OF DATA #------------------------------------------------------------------- m<-matrix(scan("/glim/datasets/trees"),ncol=3,byrow=T) m diam<-m[,1] ht<-m[,2] vol<-m[,3] glm<-glm(vol~diam+ht) summary(glm) plot(ht,resid(glm),main="Figure 4.6 Trees residuals vs ht") abline(0,0) plot(diam,resid(glm),main="Figure 4.7 Trees residuals vs diam") abline(0,0) d2<-diam^2 glm<-glm(vol~ht+diam+d2) summary(glm) plot(diam,resid(glm),main="Figure 4.8 Residuals from quadratic in diam") abline(0,0) lvol<-log(vol) lht<-log(ht) ldiam<-log(diam) glm<-glm(lvol~lht+ldiam) summary(glm) plot(fitted(glm),resid(glm),main="Figure 4.9 Residuals from log model") abline(0,0) sum((vol-exp(fitted(glm)))^2) #------------------------------------------------------------------- # 4.3 ITERATIVELY RE-WEIGHTED LEAST SQUARES #------------------------------------------------------------------- m<-matrix(scan("/glim/datasets/accident"),ncol=9,byrow=T) acc<-m[,1] tmax<-m[,2] tmin<-m[,3] tmean<-m[,4] tcond<-m[,5] sun<-m[,6] rain<-m[,7] snow<-m[,8] days<-m[,9] summary(glm(acc~tmax+tmin+tmean+tcond+sun+rain+snow+days)) glm<-glm(acc~sun) summary(glm) plot(fitted(glm),resid(glm),main="Figure 4.10 Accidents residuals") abline(0,0) x<-(37:72) sd<-sqrt(x) lines(x,sd,lty=2) lines(x,-sd,lty=2) glm1<-glm(acc~sun,weight=1/acc) summary(glm1) glm2<-glm(acc~sun,weight=1/fitted(glm1)) summary(glm2) glm3<-glm(acc~sun,weight=1/fitted(glm2)) summary(glm3) wr<-(acc-fitted(glm3))/sqrt(fitted(glm2)) sum(wr^2) plot(fitted(glm3),wr,main="Figure 4.11 Accidents weighted residuals") abline(0,0) abline(1,0,lty=2) abline(-1,0,lty=2) glm<-glm(acc~sun,family=poisson(link=identity)) summary(glm) x2<-sum((acc-fitted(glm))^2*glm$weights) x2 d<-2*sum(acc*log(acc/fitted(glm))+fitted(glm)-acc) d cbind(acc,fitted(glm),resid(glm),wr)[1:4,] glm<-glm(acc~1,family=poisson(link=identity)) summary(glm) #------------------------------------------------------------------- # 6. LOG LINEAR MODELS FOR POISSON DATA #------------------------------------------------------------------- cars<-c(294,276,238,192) glm<-glm(cars~1,family=poisson) summary(glm) sum((cars-fitted(glm))^2/fitted(glm)) cbind(cars,fitted(glm),resid(glm)) lane<-1:4 par(mfrow=c(2,2)) plot(lane,cars,main="Figure 6.1 Cars vs. lane") glm<-glm(cars~lane,family=poisson) summary(glm) sum((cars-fitted(glm))^2/fitted(glm)) cbind(cars,fitted(glm),resid(glm)) lines(lane,fitted(glm)) lane<-factor(lane) options(contrasts=c("contr.treatment", "contr.poly")) glm<-glm(cars~lane,family=poisson) summary(glm) sum((cars-fitted(glm))^2/fitted(glm)) cbind(cars,fitted(glm),resid(glm)) x<-c(1997,906,904,32) p0<-c(0.5,0.25,0.25,0) p1<-c(0.25,-0.25,-0.25,0.25) ex0<-p0*sum(x) theta<-p1*sum(x) cbind(x,ex0,theta) glm<-glm(x~-1+theta,family=poisson(link=identity),offset=ex0) summary(glm) sum((x-fitted(glm))^2/fitted(glm)) cbind(x,fitted(glm),resid(glm)) defect<-c(15,21,45,13, 26,31,34, 5, 33,17,49,20) type<-factor(rep(1:4,3)) shift<-factor(rep(1:3,each=4)) cbind(type,shift,defect) glm<-glm(defect~type+shift,family=poisson) summary(glm) sum((defect-fitted(glm))^2/fitted(glm)) cbind(defect,fitted(glm),resid(glm)) wife<-factor(c(1,1,1,2,2,2,2,3,3,3,4,4,5,5,5,5,5)) husb<-factor(c(2,3,5,1,3,4,5,2,4,5,1,5,1,2,3,4,5)) marr<-c(5,17,6,5,0,16,2,2,10,11,10,9,6,20,8,0,1) cbind(wife,husb,marr) glm<-glm(marr~wife+husb,family=poisson) summary(glm) sum((marr-fitted(glm))^2/fitted(glm)) cbind(marr,fitted(glm),resid(glm)) y<-c(2,5,4,28,35,39,38,29,22,13,9,5)*10 opin<-rep(1:4,each=3) sond<-rep(1:3,4) Opin<-factor(opin) Sond<-factor(sond) glm<-glm(y~Opin+Sond,family=poisson) summary(glm) sum((y-fitted(glm))^2/fitted(glm)) cbind(y,fitted(glm),resid(glm)) glm(y~Opin+Sond+Opin:Sond,family=poisson) x<-opin*sond glm<-glm(y~Opin+Sond+x,family=poisson) summary(glm) sum((y-fitted(glm))^2/fitted(glm)) cbind(y,fitted(glm),resid(glm)) inf<-c(3,176,4,293,17,197,2,23) clin<-factor(rep(1:2,each=4)) care<-factor(rep(1:2,each=2,length.out=8)) surv<-factor(rep(1:2,length.out=8)) cbind(inf,clin,care,surv) glm(inf~care+surv,family=poisson) glm(inf~care+surv+care:surv,family=poisson) glm(inf~care+surv+clin,family=poisson) glm(inf~care+surv+clin+care:surv,family=poisson) glm(inf~care+surv+clin+care:clin,family=poisson) glm(inf~care+surv+clin+care:clin+care:surv,family=poisson) glm(inf~care+surv+clin+care:clin+surv:clin,family=poisson) glm(inf~care+surv+clin+care:clin+surv:clin+care:surv,family=poisson) glm<-glm(inf~care+surv+clin+care:clin+surv:clin,family=poisson) summary(glm) cbind(inf,fitted(glm),resid(glm)) #------------------------------------------------------------------- # 7. LOGISTIC REGRESSION #------------------------------------------------------------------- #------------------------------------------------------------------- # KYPHOSIS #------------------------------------------------------------------- y<-(kyphosis$Kyphosis=="present")*1 age<-kyphosis$Age cbind(y,age) par(mfrow=c(2,2)) plot(age,y,yaxs="s",main="Figure 7.1 Kyphosis") summary(glm(y~1,family=binomial)) glm1<-glm(y~age,family=binomial) summary(glm1) i<-order(age) i lines(age[i],fitted(glm1)[i]) a<-seq(-500,1000,100) eta<- -1.809076886 + 0.005439848 *a phat<-exp(eta)/(1+exp(eta)) plot(a,phat,yaxs="s",type="l",main="Figure 7.2 Kyphosis enlarged") points(age,y) plot(age,y,yaxs="s",main="Figure 7.3 Quadratic in age") age2<-age^2 glm2<-glm(y~age+age2,family=binomial) summary(glm2) lines(age[i],fitted(glm2)[i]) #------------------------------------------------------------------- # 245-T #------------------------------------------------------------------- misc<-c(10,17,18,11,16,24,20,17,9,15,16,15) totf<-c(222,221,188,183,197,218,208,219,198,216,244,218) herb<-c(0,0,0,0,1454,3280,788,0,304,560,0,0) p<-misc/totf cbind(month.name,herb,misc,totf,p) plot(herb,p,main="Figure 7.4 245-T") summary(glm(p~1,family=binomial,weight=totf)) glm<-glm(p~herb,family=binomial,weight=totf) summary(glm) cbind(p,fitted(glm),resid(glm)) i<-order(herb) lines(herb[i],fitted(glm)[i]) h<-seq(-5000,35000,1000) eta<- -2.6206479509+ 0.0001629353 *h phat<-exp(eta)/(1+exp(eta)) plot(h,phat,yaxs="s",type="l",main="Figure 7.5 245-T enlarged") points(herb,p) #------------------------------------------------------------------- # CLINICS (AGAIN) #------------------------------------------------------------------- died<-c(3,4,17,2) surv<-c(176,293,197,23) totc<-died+surv p<-died/totc clin<-factor(c(1,1,2,2)) care<-factor(c(1,2,1,2)) cbind(p,clin,care) options(contrasts = c("contr.treatment", "contr.poly")) summary(glm(p~1,family=binomial,weight=totc)) summary(glm(p~care,family=binomial,weight=totc)) summary(glm(p~clin,family=binomial,weight=totc)) summary(glm(p~care+clin,family=binomial,weight=totc)) glm<-glm(p~clin,family=binomial,weight=totc) cbind(p*totc,fitted(glm)*totc,resid(glm)) #------------------------------------------------------------------- # 245-T (AGAIN) #------------------------------------------------------------------- freq<-c(misc,totf-misc) herbb<-c(herb,herb) month<-factor(rep(1:12,2)) surv<-factor(rep(1:2,each=12)) cbind(month,surv,freq,herbb) summary(glm(freq~month+surv+surv:herbb,family=poisson)) #------------------------------------------------------------------- # BIRDS #------------------------------------------------------------------- cal<-c( 36.9, 41.1, 35.8, 40.6, 34.6, 38.9, 34.3, 37.9, 32.8, 37.0, 31.7, 36.1, 31.0, 36.3, 29.8, 34.2, 29.1, 33.4, 28.2, 32.8, 27.4, 32.0, 27.8, 31.9, 25.5, 30.7, 24.9, 29.5, 23.7, 28.5, 23.1, 27.7) spec<-factor(rep(1:2,16)) temp<-rep((0:15)*2,each=2) plot(temp,cal,type="n",main="Figure 7.6 Birds") points(temp[spec==1],cal[spec==1],pch="A") points(temp[spec==2],cal[spec==2],pch="B") glm<-glm(cal~spec-1+temp:spec) summary(glm) abline(coef(glm)[c(1,3)]) abline(coef(glm)[c(2,4)]) glm<-glm(cal~temp*spec) summary(glm) plot(temp,resid(glm),type="n",main="Figure 7.7 Birds residuals") points(temp[spec==1],resid(glm)[spec==1],pch="A") points(temp[spec==2],resid(glm)[spec==2],pch="B") Temp<-factor(temp) glm<-glm(cal~Temp+temp*spec) summary(glm) diff<-cal[spec==2]-cal[spec==1] tem<-(0:15)*2 plot(tem,diff,main="Figure 7.8 Birds differences") summary(glm(diff~tem)) #------------------------------------------------------------------- # ESKIMO #------------------------------------------------------------------- m<-matrix(scan("/glim/datasets/eskimo"),ncol=2,byrow=T) pres<-m[,1] abs<-m[,2] tot<-pres+abs p<-pres/tot age<-rep(1:6,6) plot(age,p,yaxs="s",main="Figure 7.9 Eskimo") glm<-glm(p~age,family=binomial,weight=tot) summary(glm) lines(age[1:6],fitted(glm)[1:6]) a2<-age^2 glm<-glm(p~age+a2,family=binomial,weight=tot) summary(glm) Age<-factor(age) glm<-glm(p~age+a2+Age,family=binomial,weight=tot) summary(glm) sex<-factor(rep(1:2,each=6,length.out=36)) pop<-factor(rep(1:3,each=12,length.out=36)) glm(p~age+sex,family=binomial,weight=tot) glm(p~age+pop,family=binomial,weight=tot) glm(p~age+sex+pop,family=binomial,weight=tot) glm(p~age+pop+age:pop,family=binomial,weight=tot) glm(p~age+sex+pop+age:pop,family=binomial,weight=tot) glm(p~age+sex+pop+age:sex+age:pop,family=binomial,weight=tot) glm(p~age+sex+pop+age:sex+age:pop+sex:pop,family=binomial,weight=tot) glm(p~age+sex+pop+age:sex+age:pop+sex:pop+age:sex:pop,family=binomial,weight=tot) glm<-glm(p~age+pop+age:pop,family=binomial,weight=tot) summary(glm) glm<-glm(p~pop-1+age:pop,family=binomial,weight=tot) summary(glm) plot(age,p,yaxs="s",type="n",main="Figure 7.10 Eskimo") points(age[pop==1],p[pop==1],pch="1") points(age[pop==2],p[pop==2],pch="2") points(age[pop==3],p[pop==3],pch="3") lines(age[1:6],fitted(glm)[1:6],lty=2) lines(age[13:18],fitted(glm)[13:18],lty=3) lines(age[25:30],fitted(glm)[25:30],lty=4) #------------------------------------------------------------------- # 8. GAMMA FAMILY AND CENSORED SURVIVAL DATA #------------------------------------------------------------------- #------------------------------------------------------------------- # INSULATE #------------------------------------------------------------------- m<-matrix(scan("/glim/datasets/insulate"),ncol=2,byrow=T) kv<-m[,1] time<-m[,2] par(mfrow=c(2,2)) plot(kv,log(time),main="Figure 8.1 Insulate") summary(glm(time~1,family=Gamma(link=log))) glm<-glm(time~kv,family=Gamma(link=log)) summary(glm) abline(21.4118567 ,-0.5553653 ) chisq<-(371.6379 -131.6428 )/1.734156 c(chisq,1-pchisq(chisq,1)) kv2<-kv^2 summary(glm(time~kv+kv2,family=Gamma(link=log))) chisq<-(131.6428 -131.4256 )/1.715972 c(chisq,1-pchisq(chisq,1)) u<-1-exp(-time/fitted(glm)) hist(u, main="Figure 8.2 Uniform?") u<-sort(u) uhat<-((1:76)-0.5)/76 plot(uhat,u,xaxs="s",yaxs="s", main="Figure 8.3 P-P plot") abline(0,1) k<-sqrt(76)*(max(abs(u-uhat))+0.5/76) k #------------------------------------------------------------------- # USING THE INVERSE LIKELIHOOD #------------------------------------------------------------------- ystar<-1/time summary(glm(ystar~1,family=poisson, weight=time)) summary(glm(ystar~kv,family=poisson, weight=time)) #------------------------------------------------------------------- # CENSORED EXPONENTIAL DATA #------------------------------------------------------------------- censor<-(time<10)*1.0 censor.time<-censor*time+(1-censor)*10 censor.time plot(kv,log(time),main="Figure 8.4 Censored data",pch=".") points(kv,log(censor.time)) abline(log(10),0,lty=2) abline(21.4118567 ,-0.5553653 ) ystar<-censor/censor.time summary(glm(ystar~1,family=poisson, weight=censor.time)) summary(glm(ystar~kv,family=poisson, weight=censor.time)) abline(17.5702038 , -0.4556724 ,lty=3) #------------------------------------------------------------------- # LEUKEMIA #------------------------------------------------------------------- m<-matrix(scan("/glim/datasets/leukemx"),ncol=3,byrow=T) sample<-factor(m[,1]) weeks<-m[,2] censor<-m[,3] rate<-censor/weeks summary(glm(rate~1,family=poisson,weight=weeks)) summary(glm(rate~sample,family=poisson,weight=weeks)) summary(glm(rate~1,family=binomial(link=cloglog),weight=weeks)) summary(glm(rate~sample,family=binomial(link=cloglog),weight=weeks)) #------------------------------------------------------------------- # AML #------------------------------------------------------------------- m<-matrix(scan("/glim/datasets/aml1"),ncol=13,byrow=T) age<-m[,1] surv <-m[,2] censor <-m[,3] rig <-m[,4] rigg <-m[,5] rigm <-m[,6] riga <-m[,7] aig <-m[,8] aigg <-m[,9] aigm <-m[,10] aiga <-m[,11] wbc <-m[,12] lim <-m[,13] rate<-censor/surv summary(glm(rate~1,family=poisson,weight=surv)) summary(glm(rate~age+rig+rigg+rigm+riga+aig+aigg+aigm+aiga+wbc+lim,family=poisson,weight=surv)) summary(glm(rate~age+rig+aig+wbc+lim,family=poisson,weight=surv)) summary(glm(rate~age+rig+wbc+lim,family=poisson,weight=surv)) summary(glm(rate~age+aig+wbc+lim,family=poisson,weight=surv)) summary(glm(rate~age+wbc+lim,family=poisson,weight=surv)) summary(glm(rate~wbc+lim,family=poisson,weight=surv)) #------------------------------------------------------------------- # MYELOMA #------------------------------------------------------------------- m<-matrix(scan("/glim/datasets/myeloma"),ncol=7,byrow=T) censor<-m[,1] time<-m[,2] urea<-m[,3] hemo<-m[,4] age<-m[,5] sex<-factor(m[,6]) calcium<-m[,7] rate<-censor/time options(contrasts = c("contr.treatment", "contr.poly")) summary(glm(rate~urea+hemo+age+sex+calcium,family=binomial(link=cloglog),weight=time)) summary(glm(rate~urea,family=binomial(link=cloglog),weight=time)) #------------------------------------------------------------------- # 9.1 CHOOSING THE FAMILY #------------------------------------------------------------------- snow<-c(4.1, 12.2, 0, 14.6, 2.5, 2.1, 4.1, 11.5, 0.7, 0, 23.3, 6.3, 0, 0, 0, 3.1, 2.6, 1.0, 0.3, 0, 0, 0, 2.3, 11.3, 6.5, 0, 4.8, 1.1, 0, 17.4, 0, 10.9, 1.3, 0, 0.8, 0, 1.0, 18.4, 0, 0, 0, 3.8, 0, 7.0, 31.0, 0, 6.6, 0, 16.9, 0, 0.3, 10.1, 0, 5.0, 1.0) ear<-6:60 par(mfrow=c(2,2)) plot(year,snow,yaxs="s",main="Figure 9.1 Snow") poissonexp<-list( family=c(name="poissonexp",link="log",variance="mu^(3/2)"), names="log", link=function(mu) log(mu), inverse=function(eta) care.exp(eta), deriv=function(mu) 1/mu, initialize=expression({y <- as.numeric(y) mu <- y+0.167*(y==0)}), variance=function(mu) mu^(3/2), deviance=function(mu, y, w, residuals = F) if(residuals) sqrt(w)*(sqrt(y)-sqrt(mu))*2/sqrt(sqrt(mu)) else sum(w*(sqrt(y)-sqrt(mu))^2*4/sqrt(mu)), weight=expression(w/( (sqrt(family$variance(mu)) * family$deriv(mu))^2) ) ) class(poissonexp)<-"family" #print.default(poissonexp) summary(glm(snow~1,family=poissonexp)) summary(glm(snow~year,family=poissonexp)) theta<-2*pi*year/11 sine<-sin(theta) cosine<-cos(theta) glm<-glm(snow~sine+cosine,family=poissonexp) summary(glm) lines(year,fitted(glm)) #------------------------------------------------------------------- # 9.1 CHOOSING THE LINK #------------------------------------------------------------------- m<-matrix(scan("/glim/datasets/trees"), ncol=3,byrow=T) diam<-m[,1] height<-m[,2] vol<-m[,3] glm(vol~diam+height ) 4.708161/ 0.3392512 glm(vol~diam+height,family=quasi(variance=constant,link=log) ) 0.1341617 /0.0111431 glm(vol~diam+height,family=quasi(variance=constant,link=sqrt) ) 0.4106364 /0.03913296 vol fvol<-floor(vol/5)-1 fvol plot(height,diam,type="n",main="Figure 9.2 Trees") for(i in 1:14) text(height[fvol==i],diam[fvol==i],i) Fvol<-factor(fvol) glm<-glm(vol~diam+height+Fvol) glm cov<-summary(glm)$cov.unscaled[2:3,2:3] cov evec<-eigen(cov) evec -0.99717411 /-0.07512518 eta<-13.2735*diam+height plot(vol,eta,main="Figure 9.3 Estimated link function")