#------------------------------------------------------------------- # 4.1 RESIDUALS #------------------------------------------------------------------- > m <- matrix(scan("/glim/datasets/peru"), byrow = T, ncol = 10) > mig <- m[, 2] > wt <- m[, 3] > sys <- m[, 9] > glm <- glm(sys ~ mig + wt) > summary(glm) Coefficients: Value Std. Error t value (Intercept) 50.319127 15.8183899 3.181052 mig -0.571845 0.1879405 -3.042691 wt 1.354078 0.2672237 5.067208 (Dispersion Parameter for Gaussian family taken to be 105.0877 ) Residual Deviance: 3783.157 on 36 degrees of freedom > cbind(sys, fitted(glm), resid(glm))[1:4, ] sys 1 170 145.8868 24.113181 2 120 123.3935 -3.393464 3 125 123.2883 1.711730 4 148 132.3460 15.653961 > d1 <- c(1, rep(0, 38)) > glm <- glm(sys ~ mig + wt + d1) > summary(glm) Coefficients: Value Std. Error t value (Intercept) 62.5528138 15.0873861 4.146034 mig -0.3828768 0.1842212 -2.078353 wt 1.1043242 0.2596061 4.253845 d1 29.4230445 10.3516945 2.842341 (Dispersion Parameter for Gaussian family taken to be 87.81926 ) Residual Deviance: 3073.674 on 35 degrees of freedom > cbind(sys, fitted(glm), resid(glm))[1:4, ] sys 1 170 170.0000 0.000000 2 120 122.6499 -2.649870 3 125 122.4806 2.519415 4 148 129.5337 18.466287 > glm <- glm(sys ~ mig + wt, weight = 1 - d1) > summary(glm) Warning messages: Warning in summary.glm(glm): 1 rows with zero weights not counted Coefficients: Value Std. Error t value (Intercept) 62.5528138 15.0873861 4.146034 mig -0.3828768 0.1842212 -2.078353 wt 1.1043242 0.2596061 4.253845 (Dispersion Parameter for Gaussian family taken to be 87.81926 ) Residual Deviance: 3073.674 on 35 degrees of freedom > cbind(sys, fitted(glm), resid(glm))[1:4, ] sys 1 170 140.5770 0.000000 2 120 122.6499 -2.649870 3 125 122.4806 2.519415 4 148 129.5337 18.466287 > glm <- glm(sys ~ mig + wt) > vl <- predict.glm(glm, se.fit = T)$se.fit^2 > sc <- summary(glm)$dispersion > r <- resid(glm) > z <- r/sqrt(sc - vl) > 24.113181/(1 - vl[1]/sc) 1 29.42304 > df <- glm$df.residual > tstat <- z/sqrt((df - z^2)/(df - 1)) > cbind(r, z, tstat) r z tstat 1 24.1131807 2.59833475 2.84234089 2 -3.3934636 -0.34047128 -0.33625101 3 1.7117305 0.17225197 0.16991276 4 15.6539607 1.59087468 1.62685131 > par(mfrow = c(2, 2)) > plot(fitted(glm), r, main = "Figure 4.1 Peru residuals vs fitted") > abline(0, 0) > hist(z, main = "Figure 4.2 Peru normalized residuals, z") > u <- pnorm(z) > hist(u, main = "Figure 4.3 Peru uniforms, u") > i <- 1:39 > uhat <- (i - 0.5)/39 > u <- sort(u) > plot(uhat, u, xaxs = "s", yaxs = "s", main = "Figure 4.4 Peru P-P plot") > abline(0, 1) > zhat <- qnorm(uhat) > z <- sort(z) > plot(zhat, z, main = "Figure 4.5 Peru Q-Q plot") > abline(0, 1) > k <- sqrt(39) * (max(u - uhat) + 0.5/39) > k [1] 0.7771318 > pk <- 2 * exp(-2 * k^2) > pk [1] 0.5976677 > pk - 2 * exp(-2 * 4 * k^2) [1] 0.5817181 #------------------------------------------------------------------- # 4.2 TRANSFORMS OF DATA #------------------------------------------------------------------- > m <- matrix(scan("/glim/datasets/trees"), ncol = 3, byrow = T) > m [,1] [,2] [,3] [1,] 8.3 70 10.3 [2,] 8.6 65 10.3 [3,] 8.8 63 10.2 [4,] 10.5 72 16.4 [5,] 10.7 81 18.8 [6,] 10.8 83 19.7 [7,] 11.0 66 15.6 [8,] 11.0 75 18.2 [9,] 11.1 80 22.6 [10,] 11.2 75 19.9 [11,] 11.3 79 24.2 [12,] 11.4 76 21.0 [13,] 11.4 76 21.4 [14,] 11.7 69 21.3 [15,] 12.0 75 19.1 [16,] 12.9 74 22.2 [17,] 12.9 85 33.8 [18,] 13.3 86 27.4 [19,] 13.7 71 25.7 [20,] 13.8 64 24.9 [21,] 14.0 78 34.5 [22,] 14.2 80 31.7 [23,] 14.5 74 36.3 [24,] 16.0 72 38.3 [25,] 16.3 77 42.6 [26,] 17.3 81 55.4 [27,] 17.5 82 55.7 [28,] 17.9 80 58.3 [29,] 18.0 80 51.5 [30,] 18.0 80 51.0 [31,] 20.6 87 77.0 > diam <- m[, 1] > ht <- m[, 2] > vol <- m[, 3] > glm <- glm(vol ~ diam + ht) > summary(glm) Coefficients: Value Std. Error t value (Intercept) -57.9876589 8.6382259 -6.712913 diam 4.7081605 0.2642646 17.816084 ht 0.3392512 0.1301512 2.606594 (Dispersion Parameter for Gaussian family taken to be 15.06862 ) Residual Deviance: 421.9214 on 28 degrees of freedom > plot(ht, resid(glm), main = "Figure 4.6 Trees residuals vs ht") > abline(0, 0) > plot(diam, resid(glm), main = "Figure 4.7 Trees residuals vs diam") > abline(0, 0) > d2 <- diam^2 > glm <- glm(vol ~ ht + diam + d2) > summary(glm) Coefficients: Value Std. Error t value (Intercept) -9.9204060 10.07911393 -0.9842538 ht 0.3763873 0.08823199 4.2658826 diam -2.8850787 1.30985084 -2.2026010 d2 0.2686224 0.04590478 5.8517309 (Dispersion Parameter for Gaussian family taken to be 6.889327 ) Residual Deviance: 186.0118 on 27 degrees of freedom > plot(diam, resid(glm), main = "Figure 4.8 Residuals from quadratic in diam") > abline(0, 0) > lvol <- log(vol) > lht <- log(ht) > ldiam <- log(diam) > glm <- glm(lvol ~ lht + ldiam) > summary(glm) Coefficients: Value Std. Error t value (Intercept) -6.631617 0.79978973 -8.291701 lht 1.117123 0.20443706 5.464388 ldiam 1.982650 0.07501061 26.431592 (Dispersion Parameter for Gaussian family taken to be 0.0066237 ) Residual Deviance: 0.1854634 on 28 degrees of freedom > plot(fitted(glm), resid(glm), main = "Figure 4.9 Residuals from log model") > abline(0, 0) > sum((vol - exp(fitted(glm)))^2) [1] 180.826 #------------------------------------------------------------------- # 4.3 ITERATIVELY RE-WEIGHTED LEAST SQUARES #------------------------------------------------------------------- > m <- matrix(scan("/glim/datasets/accident"), ncol = 9, byrow = T) > acc <- m[, 1] > tmax <- m[, 2] > tmin <- m[, 3] > tmean <- m[, 4] > tcond <- m[, 5] > sun <- m[, 6] > rain <- m[, 7] > snow <- m[, 8] > days <- m[, 9] > summary(glm(acc ~ tmax + tmin + tmean + tcond + sun + rain + snow + days )) Coefficients: Value Std. Error t value (Intercept) 70.96650719 87.89237389 0.80742508 tmax 2.93283824 8.76705263 0.33452956 tmin -2.16064062 10.77910458 -0.20044713 tmean -5.84880788 18.09550369 -0.32321885 tcond 4.80027919 2.36832971 2.02686272 sun -0.03285072 0.06736008 -0.48768828 rain 0.04848438 0.07896030 0.61403488 snow 0.04642392 0.17336415 0.26778272 days 0.06666291 3.00649111 0.02217299 (Dispersion Parameter for Gaussian family taken to be 159.2134 ) Residual Deviance: 4298.761 on 27 degrees of freedom > glm <- glm(acc ~ sun) > summary(glm) Coefficients: Value Std. Error t value (Intercept) 80.1032887 5.46739284 14.65109 sun -0.1299225 0.03009652 -4.31686 (Dispersion Parameter for Gaussian family taken to be 160.5767 ) Residual Deviance: 5459.608 on 34 degrees of freedom > plot(fitted(glm), resid(glm), main = "Figure 4.10 Accidents residuals") > abline(0, 0) > x <- (37:72) > sd <- sqrt(x) > lines(x, sd, lty = 2) > lines(x, - sd, lty = 2) > glm1 <- glm(acc ~ sun, weight = 1/acc) > summary(glm1) Coefficients: Value Std. Error t value (Intercept) 78.2197552 5.47469880 14.287499 sun -0.1341008 0.02819568 -4.756078 (Dispersion Parameter for Gaussian family taken to be 2.735646 ) Residual Deviance: 93.01195 on 34 degrees of freedom > glm2 <- glm(acc ~ sun, weight = 1/fitted(glm1)) > summary(glm2) Coefficients: Value Std. Error t value (Intercept) 79.4678543 5.70733700 13.923806 sun -0.1261302 0.02928495 -4.306998 (Dispersion Parameter for Gaussian family taken to be 3.037198 ) Residual Deviance: 103.2647 on 34 degrees of freedom > glm3 <- glm(acc ~ sun, weight = 1/fitted(glm2)) > summary(glm3) Coefficients: Value Std. Error t value (Intercept) 79.5474479 5.68195750 14.000008 sun -0.1266052 0.02938507 -4.308488 (Dispersion Parameter for Gaussian family taken to be 2.877384 ) Residual Deviance: 97.83106 on 34 degrees of freedom > wr <- (acc - fitted(glm3))/sqrt(fitted(glm2)) > sum(wr^2) [1] 97.83106 > plot(fitted(glm3), wr, main = "Figure 4.11 Accidents weighted residuals") > abline(0, 0) > abline(1, 0, lty = 2) > abline(-1, 0, lty = 2) > glm <- glm(acc ~ sun, family = poisson(link = identity)) > summary(glm) Coefficients: Value Std. Error t value (Intercept) 79.5474479 3.34964706 23.748009 sun -0.1266052 0.01732319 -7.308426 (Dispersion Parameter for Poisson family taken to be 1 ) Residual Deviance: 95.29953 on 34 degrees of freedom Number of Fisher Scoring Iterations: 3 > x2 <- sum((acc - fitted(glm))^2 * glm$weights) > x2 [1] 97.83106 > d <- 2 * sum(acc * log(acc/fitted(glm)) + fitted(glm) - acc) > d [1] 95.29953 > cbind(acc, fitted(glm), resid(glm), wr)[1:4, ] acc wr 1 65 65.72216 -0.08924281 -0.08909772 2 64 67.31738 -0.40771712 -0.40442743 3 65 61.41758 0.45278099 0.45716330 4 57 53.26420 0.50606208 0.51178596 > glm <- glm(acc ~ 1, family = poisson(link = identity)) > summary(glm) Coefficients: Value Std. Error t value (Intercept) 58.33333 1.272938 45.82576 (Dispersion Parameter for Poisson family taken to be 1 ) Residual Deviance: 146.8585 on 35 degrees of freedom Number of Fisher Scoring Iterations: 0