> #------------------------------------------------------------------- > # 4.1 RESIDUALS > #------------------------------------------------------------------- > > m<-matrix(scan("/glim/datasets/peru"),byrow=T,ncol=10) Read 390 items > mig<-m[,2] > wt<-m[,3] > sys<-m[,9] > > glm<-glm(sys~mig+wt) > summary(glm) Call: glm(formula = sys ~ mig + wt) Deviance Residuals: Min 1Q Median 3Q Max -17.469 -7.878 1.076 6.292 24.113 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 50.3191 15.8184 3.181 0.00302 ** mig -0.5718 0.1879 -3.043 0.00436 ** wt 1.3541 0.2672 5.067 1.22e-05 *** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 (Dispersion parameter for gaussian family taken to be 105.0877) Null deviance: 6531.4 on 38 degrees of freedom Residual deviance: 3783.2 on 36 degrees of freedom AIC: 297.09 Number of Fisher Scoring iterations: 2 > cbind(sys,fitted(glm),resid(glm))[1:4,] sys 1 170 145.8868 24.113181 2 120 123.3935 -3.393464 3 125 123.2883 1.711730 4 148 132.3460 15.653961 > d1<-c(1,rep(0,38)) > > glm<-glm(sys~mig+wt+d1) > summary(glm) Call: glm(formula = sys ~ mig + wt + d1) Deviance Residuals: Min 1Q Median 3Q Max -17.746 -6.996 1.225 6.019 18.466 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 62.5528 15.0874 4.146 0.000204 *** mig -0.3829 0.1842 -2.078 0.045074 * wt 1.1043 0.2596 4.254 0.000149 *** d1 29.4230 10.3517 2.842 0.007423 ** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 (Dispersion parameter for gaussian family taken to be 87.81926) Null deviance: 6531.4 on 38 degrees of freedom Residual deviance: 3073.7 on 35 degrees of freedom AIC: 290.99 Number of Fisher Scoring iterations: 2 > cbind(sys,fitted(glm),resid(glm))[1:4,] sys 1 170 170.0000 5.684342e-14 2 120 122.6499 -2.649870e+00 3 125 122.4806 2.519415e+00 4 148 129.5337 1.846629e+01 > > glm<-glm(sys~mig+wt,weight=1-d1) > summary(glm) Call: glm(formula = sys ~ mig + wt, weights = 1 - d1) Deviance Residuals: Min 1Q Median 3Q Max -17.746 -6.996 1.225 6.019 18.466 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 62.5528 15.0874 4.146 0.000204 *** mig -0.3829 0.1842 -2.078 0.045074 * wt 1.1043 0.2596 4.254 0.000149 *** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 (Dispersion parameter for gaussian family taken to be 87.81926) Null deviance: 4669.8 on 37 degrees of freedom Residual deviance: 3073.7 on 35 degrees of freedom AIC: 282.77 Number of Fisher Scoring iterations: 2 Warning message: observations with zero weight not used for calculating dispersion in: summary.glm(glm) > cbind(sys,fitted(glm),resid(glm))[1:4,] sys 1 170 140.5770 0.000000 2 120 122.6499 -2.649870 3 125 122.4806 2.519415 4 148 129.5337 18.466287 > > glm<-glm(sys~mig+wt) > vl<-predict.lm(glm,se.fit=T)$se.fit^2 > sc<-summary(glm)$dispersion > r<-resid(glm) > z<-r/sqrt(sc-vl) > 24.113181/(1-vl[1]/sc) [1] 29.42304 > df<-glm$df.residual > tstat<-z/sqrt((df-z^2)/(df-1)) > cbind(r,z,tstat) r z tstat 1 24.1131807 2.59833475 2.84234089 2 -3.3934636 -0.34047128 -0.33625101 3 1.7117305 0.17225197 0.16991276 4 15.6539607 1.59087468 1.62685131 5 2.2376487 0.23048158 0.22742576 6 -17.4069080 -1.72847137 -1.77974488 7 0.7739645 0.07880631 0.07771077 8 0.2108637 0.02270212 0.02238476 9 -10.9031265 -1.09762077 -1.10084580 10 13.9324122 1.39257278 1.41164300 11 -16.9313287 -1.68300525 -1.72887330 12 -10.6266865 -1.05555489 -1.05728112 13 1.5975562 0.15791037 0.15575568 14 -16.1343379 -1.61226289 -1.65041321 15 2.1635717 0.22071694 0.21777724 16 14.0376062 1.40485804 1.42481549 17 1.0761021 0.10782296 0.10633204 18 4.3561031 0.44209389 0.43709859 19 -3.7802079 -0.38024616 -0.37568296 20 0.8627993 0.08590256 0.08470975 21 -6.5935580 -0.65775702 -0.65248980 22 9.3724103 0.92820759 0.92637741 23 4.7887223 0.47814669 0.47296321 24 7.8301329 0.79497753 0.79083077 25 -8.7966190 -0.89862509 -0.89616436 26 -5.8069127 -0.59120675 -0.58578837 27 -8.6528772 -0.87264957 -0.86969166 28 3.9026086 0.40201303 0.39728296 29 0.6915533 0.06953107 0.06856316 30 -7.5342907 -0.75413875 -0.74953495 31 10.5456159 1.05914415 1.06099167 32 3.7011489 0.37704908 0.37251167 33 -13.2616686 -1.31727213 -1.33132922 34 -17.4691419 -1.73063700 -1.78217696 35 5.8345451 0.58680211 0.58138180 36 7.8838277 0.78195228 0.77764769 37 -8.2215714 -0.83742389 -0.83387295 38 11.4847473 1.28249774 1.29447716 39 6.7498865 0.81629865 0.81243534 > max(abs(tstat)) [1] 2.842341 > pval<-pt(-max(abs(tstat)),df-1)*2 > c(pval,pval*39) [1] 0.007422829 0.289490335 > -qt(0.05/2/39,df-1) [1] 3.501906 > > par(mfrow=c(2,2)) > plot(fitted(glm),r,main="Figure 4.1 Peru residuals vs fitted") > abline(0,0) > > hist(z,main="Figure 4.2 Peru normalized residuals, z") > > u<-pnorm(z) > hist(u,main="Figure 4.3 Peru uniforms, u") > > i<-1:39 > uhat<-(i-0.5)/39 > u<-sort(u) > plot(uhat,u,xaxs="i",xlim=c(0,1),yaxs="i",ylim=c(0,1),main="Figure 4.4 Peru P-P plot") > abline(0,1) > zhat<-qnorm(uhat) > z<-sort(z) > plot(zhat,z,main="Figure 4.5 Peru Q-Q plot") > abline(0,1) > > k<-sqrt(39)*(max(u-uhat)+0.5/39) > k [1] 0.7771318 > pk<-2*exp(-2*k^2) > pk [1] 0.5976677 > pk-2*exp(-2*4*k^2) [1] 0.5817181 > > #------------------------------------------------------------------- > # 4.2 TRANSFORMS OF DATA > #------------------------------------------------------------------- > > m<-matrix(scan("/glim/datasets/trees"),ncol=3,byrow=T) Read 93 items > m [,1] [,2] [,3] [1,] 8.3 70 10.3 [2,] 8.6 65 10.3 [3,] 8.8 63 10.2 [4,] 10.5 72 16.4 [5,] 10.7 81 18.8 [6,] 10.8 83 19.7 [7,] 11.0 66 15.6 [8,] 11.0 75 18.2 [9,] 11.1 80 22.6 [10,] 11.2 75 19.9 [11,] 11.3 79 24.2 [12,] 11.4 76 21.0 [13,] 11.4 76 21.4 [14,] 11.7 69 21.3 [15,] 12.0 75 19.1 [16,] 12.9 74 22.2 [17,] 12.9 85 33.8 [18,] 13.3 86 27.4 [19,] 13.7 71 25.7 [20,] 13.8 64 24.9 [21,] 14.0 78 34.5 [22,] 14.2 80 31.7 [23,] 14.5 74 36.3 [24,] 16.0 72 38.3 [25,] 16.3 77 42.6 [26,] 17.3 81 55.4 [27,] 17.5 82 55.7 [28,] 17.9 80 58.3 [29,] 18.0 80 51.5 [30,] 18.0 80 51.0 [31,] 20.6 87 77.0 > diam<-m[,1] > ht<-m[,2] > vol<-m[,3] > glm<-glm(vol~diam+ht) > summary(glm) Call: glm(formula = vol ~ diam + ht) Deviance Residuals: Min 1Q Median 3Q Max -6.4065 -2.6493 -0.2876 2.2003 8.4847 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -57.9877 8.6382 -6.713 2.75e-07 *** diam 4.7082 0.2643 17.816 < 2e-16 *** ht 0.3393 0.1302 2.607 0.0145 * --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 (Dispersion parameter for gaussian family taken to be 15.06862) Null deviance: 8106.08 on 30 degrees of freedom Residual deviance: 421.92 on 28 degrees of freedom AIC: 176.91 Number of Fisher Scoring iterations: 2 > plot(ht,resid(glm),main="Figure 4.6 Trees residuals vs ht") > abline(0,0) > plot(diam,resid(glm),main="Figure 4.7 Trees residuals vs diam") > abline(0,0) > d2<-diam^2 > glm<-glm(vol~ht+diam+d2) > summary(glm) Call: glm(formula = vol ~ ht + diam + d2) Deviance Residuals: Min 1Q Median 3Q Max -4.2928 -1.6693 -0.1018 1.7851 4.3489 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -9.92041 10.07911 -0.984 0.333729 ht 0.37639 0.08823 4.266 0.000218 *** diam -2.88508 1.30985 -2.203 0.036343 * d2 0.26862 0.04590 5.852 3.13e-06 *** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 (Dispersion parameter for gaussian family taken to be 6.889327) Null deviance: 8106.08 on 30 degrees of freedom Residual deviance: 186.01 on 27 degrees of freedom AIC: 153.52 Number of Fisher Scoring iterations: 2 > plot(diam,resid(glm),main="Figure 4.8 Residuals from quadratic in diam") > abline(0,0) > lvol<-log(vol) > lht<-log(ht) > ldiam<-log(diam) > glm<-glm(lvol~lht+ldiam) > summary(glm) Call: glm(formula = lvol ~ lht + ldiam) Deviance Residuals: Min 1Q Median 3Q Max -0.168561 -0.048488 0.002431 0.063637 0.129223 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -6.63162 0.79979 -8.292 5.06e-09 *** lht 1.11712 0.20444 5.464 7.81e-06 *** ldiam 1.98265 0.07501 26.432 < 2e-16 *** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 (Dispersion parameter for gaussian family taken to be 0.006623692) Null deviance: 8.30869 on 30 degrees of freedom Residual deviance: 0.18546 on 28 degrees of freedom AIC: -62.711 Number of Fisher Scoring iterations: 2 > plot(fitted(glm),resid(glm),main="Figure 4.9 Residuals from log model") > abline(0,0) > sum((vol-exp(fitted(glm)))^2) [1] 180.8260 > > #------------------------------------------------------------------- > # 4.3 ITERATIVELY RE-WEIGHTED LEAST SQUARES > #------------------------------------------------------------------- > > m<-matrix(scan("/glim/datasets/accident"),ncol=9,byrow=T) Read 324 items > acc<-m[,1] > tmax<-m[,2] > tmin<-m[,3] > tmean<-m[,4] > tcond<-m[,5] > sun<-m[,6] > rain<-m[,7] > snow<-m[,8] > days<-m[,9] > summary(glm(acc~tmax+tmin+tmean+tcond+sun+rain+snow+days)) Call: glm(formula = acc ~ tmax + tmin + tmean + tcond + sun + rain + snow + days) Deviance Residuals: Min 1Q Median 3Q Max -21.50348 -7.71303 -0.07034 6.86113 28.31641 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 70.96651 87.89237 0.807 0.4265 tmax 2.93284 8.76705 0.335 0.7406 tmin -2.16064 10.77910 -0.200 0.8426 tmean -5.84881 18.09550 -0.323 0.7490 tcond 4.80028 2.36833 2.027 0.0527 . sun -0.03285 0.06736 -0.488 0.6297 rain 0.04848 0.07896 0.614 0.5443 snow 0.04642 0.17336 0.268 0.7909 days 0.06666 3.00649 0.022 0.9825 --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 (Dispersion parameter for gaussian family taken to be 159.2134) Null deviance: 8452.0 on 35 degrees of freedom Residual deviance: 4298.8 on 27 degrees of freedom AIC: 294.34 Number of Fisher Scoring iterations: 2 > glm<-glm(acc~sun) > summary(glm) Call: glm(formula = acc ~ sun) Deviance Residuals: Min 1Q Median 3Q Max -25.916 -7.282 -1.368 6.727 37.013 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 80.1033 5.4674 14.651 2.97e-16 *** sun -0.1299 0.0301 -4.317 0.000129 *** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 (Dispersion parameter for gaussian family taken to be 160.5767) Null deviance: 8452.0 on 35 degrees of freedom Residual deviance: 5459.6 on 34 degrees of freedom AIC: 288.94 Number of Fisher Scoring iterations: 2 > plot(fitted(glm),resid(glm),main="Figure 4.10 Accidents residuals") > abline(0,0) > x<-(37:72) > sd<-sqrt(x) > lines(x,sd,lty=2) > lines(x,-sd,lty=2) > > glm1<-glm(acc~sun,weight=1/acc) > summary(glm1) Call: glm(formula = acc ~ sun, weights = 1/acc) Deviance Residuals: Min 1Q Median 3Q Max -3.7277 -0.6499 0.1485 1.1079 4.3022 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 78.2198 5.4747 14.287 6.22e-16 *** sun -0.1341 0.0282 -4.756 3.55e-05 *** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 (Dispersion parameter for gaussian family taken to be 2.735646) Null deviance: 154.893 on 35 degrees of freedom Residual deviance: 93.012 on 34 degrees of freedom AIC: 11.181 Number of Fisher Scoring iterations: 2 > > glm2<-glm(acc~sun,weight=1/fitted(glm1)) > summary(glm2) Call: glm(formula = acc ~ sun, weights = 1/fitted(glm1)) Deviance Residuals: Min 1Q Median 3Q Max -3.2225 -0.9080 -0.1652 0.8716 5.4110 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 79.46785 5.70734 13.924 1.32e-15 *** sun -0.12613 0.02928 -4.307 0.000133 *** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 (Dispersion parameter for gaussian family taken to be 3.037198) Null deviance: 159.61 on 35 degrees of freedom Residual deviance: 103.26 on 34 degrees of freedom AIC: 11.255 Number of Fisher Scoring iterations: 2 > > glm3<-glm(acc~sun,weight=1/fitted(glm2)) > summary(glm3) Call: glm(formula = acc ~ sun, weights = 1/fitted(glm2)) Deviance Residuals: Min 1Q Median 3Q Max -3.1735 -0.8941 -0.1640 0.8540 5.2396 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 79.54745 5.68196 14.000 1.13e-15 *** sun -0.12661 0.02939 -4.308 0.000133 *** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 (Dispersion parameter for gaussian family taken to be 2.877384) Null deviance: 151.244 on 35 degrees of freedom Residual deviance: 97.831 on 34 degrees of freedom AIC: 10.988 Number of Fisher Scoring iterations: 2 > > wr<-(acc-fitted(glm3))/sqrt(fitted(glm2)) > sum(wr^2) [1] 97.83106 > plot(fitted(glm3),wr,main="Figure 4.11 Accidents weighted residuals") > abline(0,0) > abline(1,0,lty=2) > abline(-1,0,lty=2) > > glm<-glm(acc~sun,family=poisson(link=identity)) > summary(glm) Call: glm(formula = acc ~ sun, family = poisson(link = identity)) Deviance Residuals: Min 1Q Median 3Q Max -3.4234 -0.9117 -0.1647 0.8394 4.7345 Coefficients: Estimate Std. Error z value Pr(>|z|) (Intercept) 79.54744 3.34965 23.748 < 2e-16 *** sun -0.12661 0.01732 -7.308 2.70e-13 *** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 (Dispersion parameter for poisson family taken to be 1) Null deviance: 146.86 on 35 degrees of freedom Residual deviance: 95.30 on 34 degrees of freedom AIC: 310.64 Number of Fisher Scoring iterations: 3 > x2<-sum((acc-fitted(glm))^2*glm$weights) > x2 [1] 97.83113 > d<-2*sum(acc*log(acc/fitted(glm))+fitted(glm)-acc) > d [1] 95.29953 > cbind(acc,fitted(glm),resid(glm),wr)[1:4,] acc wr 1 65 65.72215 -0.08924256 -0.08909772 2 64 67.31738 -0.40771682 -0.40442743 3 65 61.41758 0.45278110 0.45716330 4 57 53.26420 0.50606189 0.51178596 > > glm<-glm(acc~1,family=poisson(link=identity)) > summary(glm) Call: glm(formula = acc ~ 1, family = poisson(link = identity)) Deviance Residuals: Min 1Q Median 3Q Max -4.7589 -1.3962 -0.1095 1.0143 3.6099 Coefficients: Estimate Std. Error z value Pr(>|z|) (Intercept) 58.333 1.273 45.83 <2e-16 *** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 (Dispersion parameter for poisson family taken to be 1) Null deviance: 146.86 on 35 degrees of freedom Residual deviance: 146.86 on 35 degrees of freedom AIC: 360.2 Number of Fisher Scoring iterations: 3