#------------------------------------------------------------------- # 1.1 SIMPLE REGRESSION USING SPLUS #------------------------------------------------------------------- > x <- c(6, 6.3, 6.5, 6.8, 7, 7.1, 7.5, 7.5, 7.6) > y <- c(39, 58, 49, 53, 80, 86, 115, 124, 104) > plot(x, y, main = "Figure 1.1 Simple regression") > glm.out <- glm(y ~ x) > glm.out Call: glm(formula = y ~ x) Coefficients: (Intercept) x -270.3485 50.41952 Degrees of Freedom: 9 Total; 7 Residual Residual Deviance: 1013.765 > summary(glm.out) Call: glm(formula = y ~ x) Deviance Residuals: Min 1Q Median 3Q Max -19.50428 -8.378425 -1.630137 7.202055 16.20205 Coefficients: Value Std. Error t value (Intercept) -270.34846 51.862458 -5.212797 x 50.41952 7.469724 6.749851 (Dispersion Parameter for Gaussian family taken to be 144.8236 ) Null Deviance: 7612 on 8 degrees of freedom Residual Deviance: 1013.765 on 7 degrees of freedom Number of Fisher Scoring Iterations: 1 Correlation of Coefficients: (Intercept) x -0.9970042 > lines(x, glm.out$fitted.values) > glm(y ~ 1) Call: glm(formula = y ~ 1) Coefficients: (Intercept) 78.66667 Degrees of Freedom: 9 Total; 8 Residual Residual Deviance: 7612 > r <- (7612 - 1013.765)/7612 > r [1] 0.8668202 > tstat <- 50.41952/7.469724 > tstat [1] 6.74985 > tstat^2/(7 + tstat^2) [1] 0.8668201 #------------------------------------------------------------------- # 1.2 MAYER'S ESTIMATE OF THE LIBRATION OF THE MOON #------------------------------------------------------------------- > m <- matrix(scan("/glim/datasets/moon.dat"), byrow = T, ncol = 5) > m [,1] [,2] [,3] [,4] [,5] [1,] 13 10 0.8836 -0.4682 1 [2,] 13 8 0.9996 -0.0282 1 [3,] 13 12 0.9899 0.1421 1 [4,] 14 15 0.2221 0.9750 3 [5,] 14 42 0.0006 1.0000 3 [6,] 13 1 0.9308 -0.3654 1 [7,] 14 31 0.0602 0.9982 3 [8,] 14 57 -0.1570 0.9876 2 [9,] 13 5 0.9097 -0.4152 1 [10,] 13 2 1.0000 0.0055 1 [11,] 13 12 0.9689 0.2476 1 [12,] 13 11 0.8878 0.4602 1 [13,] 13 34 0.7549 0.6558 3 [14,] 13 53 0.5755 0.8178 3 [15,] 13 58 0.3608 0.9326 3 [16,] 14 14 0.1302 0.9915 3 [17,] 14 56 -0.1068 0.9943 3 [18,] 14 47 -0.3363 0.9418 2 [19,] 15 56 -0.8560 0.5170 2 [20,] 13 29 0.8002 0.5997 3 [21,] 15 55 -0.9952 -0.0982 2 [22,] 15 39 -0.8409 0.5412 2 [23,] 16 9 -0.9429 0.3330 2 [24,] 16 22 -0.9768 0.2141 2 [25,] 15 38 -0.6262 -0.7797 2 [26,] 14 54 -0.4091 -0.9125 2 [27,] 13 7 0.9284 -0.3716 1 > y <- - m[, 1] - m[, 2]/60 > y [1] -13.16667 -13.13333 -13.20000 -14.25000 -14.70000 -13.01667 -14.51667 [8] -14.95000 -13.08333 -13.03333 -13.20000 -13.18333 -13.56667 -13.88333 [15] -13.96667 -14.23333 -14.93333 -14.78333 -15.93333 -13.48333 -15.91667 [22] -15.65000 -16.15000 -16.36667 -15.63333 -14.90000 -13.11667 > alpha <- m[, 3] > asin <- m[, 4] > grp <- m[, 5] > glm.out <- glm(y ~ alpha + asin) > glm.out Call: glm(formula = y ~ alpha + asin) Coefficients: (Intercept) alpha asin -14.55825 1.505795 -0.07192118 Degrees of Freedom: 27 Total; 24 Residual Residual Deviance: 0.5688748 > summary(glm.out) Call: glm(formula = y ~ alpha + asin) Deviance Residuals: Min 1Q Median 3Q Max -0.3221622 -0.09243684 0.01687288 0.09446685 0.3490464 Coefficients: Value Std. Error t value (Intercept) -14.55824555 0.03539838 -411.268737 alpha 1.50579501 0.04173283 36.081784 asin -0.07192118 0.05083137 -1.414897 (Dispersion Parameter for Gaussian family taken to be 0.0237031 ) Null Deviance: 32.13944 on 26 degrees of freedom Residual Deviance: 0.5688748 on 24 degrees of freedom Number of Fisher Scoring Iterations: 1 Correlation of Coefficients: (Intercept) alpha alpha -0.2780981 asin -0.4993381 0.1116904 > x <- cbind(rep(1, 27), alpha, asin) > x alpha asin [1,] 1 0.8836 -0.4682 [2,] 1 0.9996 -0.0282 [3,] 1 0.9899 0.1421 [4,] 1 0.2221 0.9750 [5,] 1 0.0006 1.0000 [25,] 1 -0.6262 -0.7797 [26,] 1 -0.4091 -0.9125 [27,] 1 0.9284 -0.3716 > beta.ls <- solve(t(x) %*% x, t(x) %*% y) > beta.ls [,1] -14.55824555 alpha 1.50579501 asin -0.07192118 > g <- cbind(grp == 1, grp == 2, grp == 3) * 1 > g [,1] [,2] [,3] [1,] 1 0 0 [2,] 1 0 0 [3,] 1 0 0 [4,] 0 0 1 [5,] 0 0 1 [25,] 0 1 0 [26,] 0 1 0 [27,] 1 0 0 > beta.m <- solve(t(g) %*% x, t(g) %*% y) > beta.m [,1] -14.54719339 alpha 1.49580457 asin -0.09961272 > sum((y - x %*% beta.m)^2) [1] 0.5766745 > summary(glm.out)$cov.unscaled * summary(glm.out)$dispersion (Intercept) alpha asin (Intercept) 0.0012530451 -0.0004108272 -0.0008984831 alpha -0.0004108272 0.0017416292 0.0002369330 asin -0.0008984831 0.0002369330 0.0025838286 > a <- solve(t(g) %*% x, t(g)) > a %*% t(a) * summary(glm.out)$dispersion alpha asin 0.0016040221 -0.0005550267 -0.0018779456 alpha -0.0005550267 0.0020089745 0.0005190049 asin -0.0018779456 0.0005190049 0.0053867812 > plot(asin, alpha, type = "n", main = "Figure 1.2 Mayer's groups") > for(i in 1:3) text(asin[grp == i], alpha[grp == i], i) #------------------------------------------------------------------- # 1.3 MULTIPLE REGRESSION #------------------------------------------------------------------- > m <- matrix(scan("/glim/datasets/peru"), byrow = T, ncol = 10) > m [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [1,] 21 1 71.0 1629 8.0 7.0 12.7 88 170 76 [2,] 22 6 56.5 1569 3.3 5.0 8.0 64 120 60 [3,] 24 5 56.0 1561 3.3 1.3 4.3 68 125 75 [4,] 24 1 61.0 1619 3.7 3.0 4.3 52 148 120 [5,] 25 1 65.0 1566 9.0 12.7 20.7 72 140 78 [6,] 27 19 62.0 1639 3.0 3.3 5.7 72 106 72 [7,] 28 5 53.0 1494 7.3 4.7 8.0 64 120 76 [8,] 28 25 53.0 1568 3.7 4.3 0.0 80 108 62 [9,] 31 6 65.0 1540 10.3 9.0 10.0 76 124 70 [10,] 32 13 57.0 1530 5.7 4.0 6.0 60 134 64 [11,] 33 13 66.5 1622 6.0 5.7 8.3 68 116 76 [12,] 33 10 59.1 1486 6.7 5.3 10.3 72 114 74 [13,] 34 15 64.0 1578 3.3 5.3 7.0 88 130 80 [14,] 35 18 69.5 1645 9.3 5.0 7.0 60 118 68 [15,] 35 2 64.0 1648 3.0 3.7 6.7 60 138 78 [16,] 36 12 56.5 1521 3.3 5.0 11.7 72 134 86 [17,] 36 15 57.0 1547 3.0 3.0 6.0 84 120 70 [18,] 37 16 55.0 1505 4.3 5.0 7.0 64 120 76 [19,] 37 17 57.0 1473 6.0 5.3 11.7 72 114 80 [20,] 38 10 58.0 1538 8.7 6.0 13.0 64 124 64 [21,] 38 18 59.5 1513 5.3 4.0 7.7 80 114 66 [22,] 38 11 61.0 1653 4.0 3.3 4.0 76 136 78 [23,] 38 11 57.0 1566 3.0 3.0 3.0 60 126 72 [24,] 39 21 57.5 1580 4.0 3.0 5.0 64 124 62 [25,] 39 24 74.0 1647 7.3 6.3 15.7 64 128 84 [26,] 39 14 72.0 1620 6.3 7.7 13.3 68 134 92 [27,] 41 25 62.5 1637 6.0 5.3 8.0 76 112 80 [28,] 41 32 68.0 1528 10.0 5.0 11.3 60 128 82 [29,] 41 5 63.4 1647 5.3 4.3 13.7 76 134 92 [30,] 42 12 68.0 1605 11.0 7.0 10.7 88 128 90 [31,] 43 25 69.0 1625 5.0 3.0 6.0 72 140 72 [32,] 43 26 73.0 1615 12.0 4.0 5.7 68 138 74 [33,] 43 10 64.0 1640 5.7 3.0 7.0 60 118 66 [34,] 44 19 65.0 1610 8.0 6.7 7.7 74 110 70 [35,] 44 18 71.0 1572 3.0 4.7 4.3 72 142 84 [36,] 45 10 60.2 1534 3.0 3.0 3.3 56 134 70 [37,] 47 1 55.0 1536 3.0 3.0 4.0 64 116 54 [38,] 50 43 70.0 1630 4.0 6.0 11.7 72 132 90 [39,] 54 40 87.0 1542 11.3 11.7 11.3 92 152 88 > age <- m[, 1] > mig <- m[, 2] > wt <- m[, 3] > ht <- m[, 4] > chin <- m[, 5] > arm <- m[, 6] > calf <- m[, 7] > pulse <- m[, 8] > sys <- m[, 9] > dias <- m[, 10] > plot(mig, sys, main = "Figure 1.3 Peru, effect of mig") > summary(glm(sys ~ mig)) Coefficients: Value Std. Error t value (Intercept) 129.0855179 3.7851478 34.1031642 mig -0.1136264 0.2127156 -0.5341708 (Dispersion Parameter for Gaussian family taken to be 175.1744 ) Residual Deviance: 6481.452 on 37 degrees of freedom > pt(-0.5341708, 37) [1] 0.2982093 > f <- (6531.436 - 6481.452)/175.1744 > c(f, 1 - pf(f, 1, 37)) [1] 0.2853385 0.5964185 > - 0.5341708^2 [1] -0.2853384 > 0.2982093 * 2 [1] 0.5964186 > plot(wt, sys, main = "Figure 1.4 Peru, effect of wt") > summary(glm(sys ~ wt)) Coefficients: Value Std. Error t value (Intercept) 66.5968679 16.4639030 4.045023 wt 0.9628622 0.2590843 3.716405 (Dispersion Parameter for Gaussian family taken to be 128.5421 ) Residual Deviance: 4756.056 on 37 degrees of freedom > summary(glm(sys ~ wt + mig)) Coefficients: Value Std. Error t value (Intercept) 50.319127 15.8183899 3.181052 wt 1.354078 0.2672237 5.067208 mig -0.571845 0.1879405 -3.042691 (Dispersion Parameter for Gaussian family taken to be 105.0877 ) Residual Deviance: 3783.157 on 36 degrees of freedom > (4756.056 - 3783.157)/105.0877 [1] 9.257972 > - 3.042691^2 [1] -9.257969 > pt(-3.042691, 36) [1] 0.002179931 > f <- (6531.436 - 3783.157)/2/105.0877 > c(f, 1 - pf(f, 2, 36)) [1] 1.307612e+001 5.385457e-005 > plot(wt, mig, main = "Figure 1.5 mig vs wt") > rmig <- glm(mig ~ wt)$residuals > rsys <- glm(sys ~ wt)$residuals > plot(rmig, rsys, main = "Figure 1.6 Peru") > summary(glm(rsys ~ rmig - 1)) Coefficients: Value Std. Error t value rmig -0.571845 0.1829279 -3.126068 (Dispersion Parameter for Gaussian family taken to be 99.55677 ) Residual Deviance: 3783.157 on 38 degrees of freedom > 3783.157/36 [1] 105.0877 > plot(mig, sys, type = "n", main = "Figure 1.7 Point label = wt/10") > wf <- floor(wt/10) > wf [1] 7 5 5 6 6 6 5 5 6 5 6 5 6 6 6 5 5 5 5 5 5 6 5 5 7 7 6 6 6 6 6 7 6 6 7 6 [37] 5 7 8 > for(i in 5:8) text(mig[wf == i], sys[wf == i], i) > summary(glm(sys ~ ht + mig)) Coefficients: Value Std. Error t value (Intercept) 40.36194370 63.5716898 0.6349044 ht 0.05639203 0.0403358 1.3980640 mig -0.13499837 0.2105792 -0.6410814 (Dispersion Parameter for Gaussian family taken to be 170.7686 ) Residual Deviance: 6147.671 on 36 degrees of freedom > plot(ht, mig, main = "Figure 1.8 mig vs ht") > summary(glm(sys ~ ht + mig + wt + chin + arm + calf + age)) Coefficients: Value Std. Error t value (Intercept) 129.38413078 56.56907142 2.2871885 ht -0.06730789 0.04254969 -1.5818658 mig -0.56787497 0.22040028 -2.5765619 wt 2.11855825 0.46215933 4.5840430 chin -1.35274359 0.86636913 -1.5613940 arm -0.98327913 1.33833364 -0.7347040 calf 0.23114353 0.61968775 0.3730000 age -0.27770205 0.28560475 -0.9723299 (Dispersion Parameter for Gaussian family taken to be 105.4732 ) Residual Deviance: 3269.668 on 31 degrees of freedom > f <- (3783.157 - 3269.668)/5/105.4732 > c(f, 1 - pf(f, 5, 31)) [1] 0.9736862 0.4491722 #------------------------------------------------------------------- # 2.1 FACTORS AND INTERACTION #------------------------------------------------------------------- > m <- matrix(scan("/glim/datasets/educexp"), ncol = 5, byrow = T) > m [,1] [,2] [,3] [,4] [,5] [1,] 508 235 3944 325 1 [2,] 564 231 4578 323 1 [3,] 322 270 4011 328 1 [4,] 846 261 5233 305 1 [5,] 871 300 4780 303 1 [6,] 774 317 5889 307 1 [7,] 856 387 5663 301 1 [8,] 889 285 5759 310 1 [9,] 715 300 4894 300 1 [10,] 753 221 5012 324 2 [11,] 649 264 4908 329 2 [12,] 830 308 5753 320 2 [13,] 738 379 5439 337 2 [14,] 659 342 4634 328 2 [15,] 664 378 4921 330 2 [16,] 572 232 4869 318 2 [17,] 701 231 4672 309 2 [18,] 443 246 4782 333 2 [19,] 446 230 4296 330 2 [20,] 615 268 4827 318 2 [21,] 661 337 5057 304 2 [22,] 722 344 5540 328 3 [23,] 766 330 5331 323 3 [24,] 631 261 4715 317 3 [25,] 390 214 3828 310 3 [26,] 450 245 4120 321 3 [27,] 476 233 3817 342 3 [28,] 603 250 4243 339 3 [29,] 805 243 4647 287 3 [30,] 523 216 3967 325 3 [31,] 588 212 3946 315 3 [32,] 584 208 3724 332 3 [33,] 445 215 3448 358 3 [34,] 500 221 3680 320 3 [35,] 661 244 3825 355 3 [36,] 680 234 4189 306 3 [37,] 797 269 4336 335 3 [38,] 534 302 4418 335 4 [39,] 541 268 4323 344 4 [40,] 605 323 4813 331 4 [41,] 785 304 5046 324 4 [42,] 698 317 3764 366 4 [43,] 796 332 4504 340 4 [44,] 804 315 4005 378 4 [45,] 809 291 5560 330 4 [46,] 726 312 4989 313 4 [47,] 671 316 4697 305 4 [48,] 909 332 5438 307 4 > urb <- m[, 1] > exp <- m[, 2] > inc <- m[, 3] > n18 <- m[, 4] > reg <- m[, 5] > plot(reg, exp, main = "Figure 2.1 Education") > r1 <- (reg == 1) * 1 > r2 <- (reg == 2) * 1 > r3 <- (reg == 3) * 1 > r4 <- (reg == 4) * 1 > cbind(r, r1, r2, r3, r4) reg r1 r2 r3 r4 [1,] 1 1 0 0 0 [2,] 1 1 0 0 0 [3,] 1 1 0 0 0 [4,] 1 1 0 0 0 [5,] 1 1 0 0 0 [6,] 1 1 0 0 0 [7,] 1 1 0 0 0 [8,] 1 1 0 0 0 [9,] 1 1 0 0 0 [10,] 2 0 1 0 0 [11,] 2 0 1 0 0 [12,] 2 0 1 0 0 [46,] 4 0 0 0 1 [47,] 4 0 0 0 1 [48,] 4 0 0 0 1 > summary(glm(exp ~ 1)) Coefficients: Value Std. Error t value (Intercept) 278.6042 7.096582 39.25892 (Dispersion Parameter for Gaussian family taken to be 2417.351 ) Residual Deviance: 113615.5 on 47 degrees of freedom > summary(glm(exp ~ r1 + r2 + r3 + r4)) Coefficients: (1 not defined because of singularities) Value Std. Error t value (Intercept) 310.18182 13.20771 23.484900 r1 -22.84848 19.68890 -1.160476 r2 -23.84848 18.28525 -1.304247 r3 -63.99432 17.15732 -3.729855 r4 NA NA NA (Dispersion Parameter for Gaussian family taken to be 1918.88 ) Residual Deviance: 84430.74 on 44 degrees of freedom > f <- (113615.5 - 84430.74)/3/1918.88 > c(f, 1 - pf(f, 3, 44)) [1] 5.069755969 0.004210708 > summary(glm(exp ~ r1 + r2 + r3 + r4 - 1)) Coefficients: Value Std. Error t value r1 287.3333 14.60168 19.67811 r2 286.3333 12.64542 22.64324 r3 246.1875 10.95126 22.48030 r4 310.1818 13.20771 23.48490 (Dispersion Parameter for Gaussian family taken to be 1918.88 ) Residual Deviance: 84430.74 on 44 degrees of freedom > for(i in 1:4) print(c(i, mean(exp[reg == i]))) [1] 1.0000 287.3333 [1] 2.0000 286.3333 [1] 3.0000 246.1875 [1] 4.0000 310.1818 > Reg <- factor(reg) > options(contrasts = c("contr.treatment", "contr.poly")) > summary(glm(exp ~ Reg)) Coefficients: Value Std. Error t value (Intercept) 287.33333 14.60168 19.67810642 Reg2 -1.00000 19.31620 -0.05177001 Reg3 -41.14583 18.25209 -2.25430743 Reg4 22.84848 19.68890 1.16047568 (Dispersion Parameter for Gaussian family taken to be 1918.88 ) Residual Deviance: 84430.74 on 44 degrees of freedom > summary(glm(exp ~ r2 + r3 + r4 - 1)) Coefficients: Value Std. Error t value r2 286.3333 39.14538 7.314614 r3 246.1875 33.90089 7.261977 r4 310.1818 40.88601 7.586502 (Dispersion Parameter for Gaussian family taken to be 18388.33 ) Residual Deviance: 827474.7 on 45 degrees of freedom > plot(inc, exp, type = "n", main = "Figure 2.2 No interaction") > for(i in 1:4) text(inc[reg == i], exp[reg == i], i) > summary(glm(exp ~ inc)) Coefficients: Value Std. Error t value (Intercept) 57.22197239 42.010935109 1.362073 inc 0.04768727 0.008967382 5.317859 (Dispersion Parameter for Gaussian family taken to be 1529.565 ) Residual Deviance: 70359.97 on 46 degrees of freedom > glm1 <- glm(exp ~ inc + Reg) > summary(glm1) Coefficients: Value Std. Error t value (Intercept) 69.49173206 50.060696970 1.38814951 inc 0.04381074 0.009764791 4.48660274 Reg2 0.81814570 16.131272080 0.05071799 Reg3 -7.73649343 16.959973309 -0.45616189 Reg4 35.34914924 16.671793785 2.12029669 (Dispersion Parameter for Gaussian family taken to be 1337.419 ) Residual Deviance: 57509.02 on 43 degrees of freedom > f <- (70359.97 - 57509.02)/3/1337.419 > c(f, 1 - pf(f, 3, 43)) [1] 3.20292294 0.03250252 > for(i in 1:4) lines(inc[reg == i], glm1$fitted.values[reg == i]) > ir1 <- inc * r1 > ir2 <- inc * r2 > ir3 <- inc * r3 > ir4 <- inc * r4 > cbind(reg, inc, ir1, ir2, ir3, ir4) reg inc ir1 ir2 ir3 ir4 [1,] 1 3944 3944 0 0 0 [2,] 1 4578 4578 0 0 0 [3,] 1 4011 4011 0 0 0 [4,] 1 5233 5233 0 0 0 [5,] 1 4780 4780 0 0 0 [6,] 1 5889 5889 0 0 0 [7,] 1 5663 5663 0 0 0 [8,] 1 5759 5759 0 0 0 [9,] 1 4894 4894 0 0 0 [10,] 2 5012 0 5012 0 0 [11,] 2 4908 0 4908 0 0 [12,] 2 5753 0 5753 0 0 [45,] 4 5560 0 0 0 5560 [46,] 4 4989 0 0 0 4989 [47,] 4 4697 0 0 0 4697 [48,] 4 5438 0 0 0 5438 > glm2 <- glm(exp ~ inc + Reg + ir1 + ir2 + ir3 + ir4) > summary(glm2) Coefficients: (1 not defined because of singularities) Value Std. Error t value (Intercept) 77.481242319 85.73285946 0.90375199 inc 0.001163078 0.01979001 0.05877096 Reg2 -141.946115061 163.02488170 -0.87070215 Reg3 -94.885015199 107.75007952 -0.88060274 Reg4 227.249229791 126.74681964 1.79293832 ir1 0.041040869 0.02614275 1.56987602 ir2 0.069980719 0.03432589 2.03871531 ir3 0.061451394 0.02505425 2.45273369 ir4 NA NA NA (Dispersion Parameter for Gaussian family taken to be 1220.987 ) Residual Deviance: 48839.48 on 40 degrees of freedom > f <- (57509.02 - 48839.48)/3/1220.987 > c(f, 1 - pf(f, 3, 40)) [1] 2.36681199 0.08520352 > plot(inc, exp, type = "n", main = "Figure 2.3 With interaction") > for(i in 1:4) { text(inc[reg == i], exp[reg == i], i) lines(inc[reg == i], glm2$fitted.values[reg == i]) } > summary(glm(exp ~ inc + Reg + inc:Reg)) Coefficients: Value Std. Error t value (Intercept) 77.48124232 85.73285946 0.9037520 inc 0.04220395 0.01708211 2.4706513 Reg2 -141.94611506 163.02488170 -0.8707022 Reg3 -94.88501520 107.75007952 -0.8806027 Reg4 227.24922979 126.74681964 1.7929383 incReg2 0.02893985 0.03283932 0.8812561 incReg3 0.02041053 0.02297541 0.8883640 incReg4 -0.04104087 0.02614275 -1.5698760 (Dispersion Parameter for Gaussian family taken to be 1220.987 ) Residual Deviance: 48839.48 on 40 degrees of freedom > summary(glm(exp ~ Reg + inc:Reg - 1)) Coefficients: Value Std. Error t value Reg1 77.481242319 85.73285946 0.90375199 Reg2 -64.464872742 138.66141808 -0.46490851 Reg3 -17.403772880 65.26834184 -0.26664953 Reg4 304.730472110 93.35219921 3.26430951 Reg1inc 0.042203947 0.01708211 2.47065126 Reg2inc 0.071143797 0.02804679 2.53661127 Reg3inc 0.062614472 0.01536459 4.07524410 Reg4inc 0.001163078 0.01979001 0.05877096 (Dispersion Parameter for Gaussian family taken to be 1220.987 ) Residual Deviance: 48839.48 on 40 degrees of freedom > summary(glm(exp ~ inc, weight = r1)) Warning messages: Warning in summary.glm(glm(exp ~ inc, weight = r1)): 39 rows with zero weights not counted Coefficients: Value Std. Error t value (Intercept) 77.48124232 95.44457988 0.811793 inc 0.04220395 0.01901716 2.219256 (Dispersion Parameter for Gaussian family taken to be 1513.279 ) Residual Deviance: 10592.95 on 7 degrees of freedom > summary(glm(exp ~ inc + urb + n18)) Coefficients: Value Std. Error t value (Intercept) -290.33668153 135.58423780 -2.141375 inc 0.04895980 0.01230704 3.978193 urb 0.06896144 0.04989909 1.382018 n18 0.91352747 0.33745160 2.707136 (Dispersion Parameter for Gaussian family taken to be 1302.831 ) Residual Deviance: 57324.58 on 44 degrees of freedom > summary(glm(exp ~ inc + urb + n18 + Reg)) Coefficients: Value Std. Error t value (Intercept) -183.45163885 149.47226712 -1.2273289 inc 0.04517646 0.01429142 3.1610905 urb 0.04841085 0.05295610 0.9141695 n18 0.68101569 0.36872950 1.8469249 Reg2 -4.35640569 16.53297870 -0.2634979 Reg3 -11.53595442 16.60137148 -0.6948796 Reg4 19.82306934 17.80601741 1.1132792 (Dispersion Parameter for Gaussian family taken to be 1264.852 ) Residual Deviance: 51858.94 on 41 degrees of freedom > f <- (57324.58 - 51858.94)/3/1264.852 > c(f, 1 - pf(f, 3, 41)) [1] 1.4403899 0.2449547 > summary(glm(exp ~ inc + urb + n18 + Reg + (inc + urb + n18):Reg)) Coefficients: Value Std. Error t value (Intercept) 1.503278e+003 792.69107055 1.896424099 inc 4.046002e-002 0.02955810 1.368830201 urb -1.930183e-001 0.15273764 -1.263724361 n18 -4.114715e+000 2.27281142 -1.810407542 Reg2 -2.038133e+003 875.86105576 -2.327005358 Reg3 -1.799109e+003 818.28033513 -2.198645888 Reg4 -9.441369e+002 886.50883277 -1.065005674 incReg2 1.012932e-004 0.05209864 0.001944258 incReg3 3.101351e-002 0.03656007 0.848289274 incReg4 -7.031679e-002 0.04762851 -1.476359242 urbReg2 3.305324e-001 0.21007751 1.573383044 urbReg3 1.849460e-001 0.17938882 1.030978341 urbReg4 3.202425e-001 0.18872859 1.696841690 n18Reg2 5.761912e+000 2.53109015 2.276454686 n18Reg3 4.869708e+000 2.33223334 2.088001851 n18Reg4 3.515340e+000 2.41742097 1.454169635 (Dispersion Parameter for Gaussian family taken to be 1185.193 ) Residual Deviance: 37926.18 on 32 degrees of freedom > f <- (51858.94 - 37926.18)/9/1185.193 > c(f, 1 - pf(f, 9, 32)) [1] 1.3061876 0.2722586 #------------------------------------------------------------------- # 2.2 ORTHOGONALITY AND BALANCED DESIGNS #------------------------------------------------------------------- > m <- matrix(scan("/glim/datasets/medcare"), ncol = 3, byrow = T) > m [,1] [,2] [,3] [1,] 2 1 1 [2,] 5 1 1 [3,] 8 1 1 [4,] 6 1 1 [5,] 2 1 1 [6,] 4 1 1 [7,] 3 1 1 [8,] 10 1 1 [9,] 7 1 2 [10,] 5 1 2 [11,] 8 1 2 [12,] 6 1 2 [13,] 3 1 2 [14,] 5 1 2 [15,] 6 1 2 [16,] 4 1 2 [17,] 5 1 2 [18,] 6 1 2 [19,] 8 1 2 [20,] 9 1 2 [21,] 4 2 1 [22,] 6 2 1 [23,] 3 2 1 [24,] 3 2 1 [25,] 7 2 2 [26,] 7 2 2 [27,] 8 2 2 [28,] 6 2 2 [29,] 4 2 2 [30,] 9 2 2 [31,] 8 2 2 [32,] 7 2 2 [33,] 8 3 1 [34,] 7 3 1 [35,] 5 3 1 [36,] 9 3 1 [37,] 9 3 1 [38,] 10 3 1 [39,] 8 3 1 [40,] 6 3 1 [41,] 8 3 1 [42,] 10 3 1 [43,] 5 3 2 [44,] 8 3 2 [45,] 6 3 2 [46,] 6 3 2 [47,] 9 3 2 [48,] 7 3 2 [49,] 7 3 2 [50,] 8 3 2 > sat <- m[, 1] > com <- factor(m[, 2]) > worry <- factor(m[, 3]) > glm0 <- glm(sat ~ 1) > summary(glm0) Coefficients: Value Std. Error t value (Intercept) 6.4 0.3010187 21.26114 (Dispersion Parameter for Gaussian family taken to be 4.530612 ) Residual Deviance: 222 on 49 degrees of freedom > glmc <- glm(sat ~ com) > summary(glmc) Coefficients: Value Std. Error t value (Intercept) 5.600000 0.4415211 12.6834250 com2 0.400000 0.7210010 0.5547843 com3 1.955556 0.6415153 3.0483381 (Dispersion Parameter for Gaussian family taken to be 3.898818 ) Residual Deviance: 183.2444 on 47 degrees of freedom > glm0$deviance - glmc$deviance [1] 38.75556 > glmw <- glm(sat ~ worry) > summary(glmw) Coefficients: Value Std. Error t value (Intercept) 6.1818182 0.4565702 13.5396893 worry 0.3896104 0.6101175 0.6385825 (Dispersion Parameter for Gaussian family taken to be 4.586039 ) Residual Deviance: 220.1299 on 48 degrees of freedom > glmcw <- glm(sat ~ com + worry) > summary(glmcw) Coefficients: Value Std. Error t value (Intercept) 5.1791045 0.5561975 9.3116288 com2 0.3532338 0.7180479 0.4919363 com3 2.0646766 0.6441058 3.2054929 worry 0.7014925 0.5689853 1.2328834 (Dispersion Parameter for Gaussian family taken to be 3.856154 ) Residual Deviance: 177.3831 on 46 degrees of freedom > glmw$deviance - glmcw$deviance [1] 42.74679 > summary(glm(sat ~ com + worry + com:worry)) Coefficients: Value Std. Error t value (Intercept) 5 0.6527912 7.6594169 com2 -1 1.1306675 -0.8844333 com3 3 0.8758113 3.4253954 worry 1 0.8427498 1.1865918 com2worry 2 1.4101902 1.4182484 com3worry -2 1.2154311 -1.6455067 (Dispersion Parameter for Gaussian family taken to be 3.409091 ) Residual Deviance: 150 on 44 degrees of freedom > summary(glm(sat ~ com:worry - 1)) Coefficients: Value Std. Error t value com1worry1 5 0.6527912 7.659417 com2worry1 4 0.9231862 4.332820 com3worry1 8 0.5838742 13.701581 com1worry2 6 0.5330018 11.256998 com2worry2 7 0.6527912 10.723184 com3worry2 7 0.6527912 10.723184 (Dispersion Parameter for Gaussian family taken to be 3.409091 ) Residual Deviance: 150 on 44 degrees of freedom > m <- matrix(scan("/glim/datasets/medcareb"), ncol = 3, byrow = T) > m [,1] [,2] [,3] [1,] 2 1 1 [2,] 5 1 1 [3,] 8 1 1 [4,] 6 1 1 [5,] 7 1 2 [6,] 5 1 2 [7,] 8 1 2 [8,] 6 1 2 [9,] 4 2 1 [10,] 6 2 1 [11,] 3 2 1 [12,] 3 2 1 [13,] 7 2 2 [14,] 7 2 2 [15,] 8 2 2 [16,] 6 2 2 [17,] 8 3 1 [18,] 7 3 1 [19,] 5 3 1 [20,] 9 3 1 [21,] 5 3 2 [22,] 8 3 2 [23,] 6 3 2 [24,] 6 3 2 > sat <- m[, 1] > com <- factor(m[, 2]) > worry <- factor(m[, 3]) > glm0 <- glm(sat ~ 1) > summary(glm0) Coefficients: Value Std. Error t value (Intercept) 6.041667 0.3685025 16.39519 (Dispersion Parameter for Gaussian family taken to be 3.259058 ) Residual Deviance: 74.95833 on 23 degrees of freedom > glmc <- glm(sat ~ com) > summary(glmc) Coefficients: Value Std. Error t value (Intercept) 5.875 0.6379609 9.2090290 com2 -0.375 0.9022129 -0.4156447 com3 0.875 0.9022129 0.9698376 (Dispersion Parameter for Gaussian family taken to be 3.255952 ) Residual Deviance: 68.375 on 21 degrees of freedom > glm0$deviance - glmc$deviance [1] 6.583333 > glmw <- glm(sat ~ worry) > summary(glmw) Coefficients: Value Std. Error t value (Intercept) 5.500000 0.5072081 10.843674 worry 1.083333 0.7173006 1.510292 (Dispersion Parameter for Gaussian family taken to be 3.087121 ) Residual Deviance: 67.91667 on 22 degrees of freedom > glmcw <- glm(sat ~ com + worry) > summary(glmcw) Coefficients: Value Std. Error t value (Intercept) 5.333333 0.7149204 7.4600385 com2 -0.375000 0.8755950 -0.4282802 com3 0.875000 0.8755950 0.9993204 worry 1.083333 0.7149204 1.5153203 (Dispersion Parameter for Gaussian family taken to be 3.066667 ) Residual Deviance: 61.33333 on 20 degrees of freedom > glmw$deviance - glmcw$deviance [1] 6.583333 > m <- matrix(scan("/glim/datasets/glue"), ncol = 3, byrow = T) > m [,1] [,2] [,3] [1,] 190 80 40 [2,] 189 80 40 [3,] 192 90 40 [4,] 190 90 40 [5,] 196 80 60 [6,] 193 80 60 [7,] 195 90 60 [8,] 196 90 60 [9,] 201 80 80 [10,] 200 80 80 [11,] 203 90 80 [12,] 205 90 80 > stren <- m[, 1] > temp <- m[, 2] > humid <- m[, 3] > glm0 <- glm(stren ~ 1) > summary(glm0) Coefficients: Value Std. Error t value (Intercept) 195.8333 1.551311 126.2373 (Dispersion Parameter for Gaussian family taken to be 28.87879 ) Residual Deviance: 317.6667 on 11 degrees of freedom > glmt <- glm(stren ~ temp) > summary(glmt) Coefficients: Value Std. Error t value (Intercept) 178.8333 27.1789338 6.5798509 temp 0.2000 0.3192004 0.6265657 (Dispersion Parameter for Gaussian family taken to be 30.56667 ) Residual Deviance: 305.6667 on 10 degrees of freedom > glm0$deviance - glmt$deviance [1] 12 > glmh <- glm(stren ~ humid) > summary(glmh) Coefficients: Value Std. Error t value (Intercept) 177.8333 1.89333627 93.92591 humid 0.3000 0.03044804 9.85285 (Dispersion Parameter for Gaussian family taken to be 2.966667 ) Residual Deviance: 29.66667 on 10 degrees of freedom > glmht <- glm(stren ~ humid + temp) > summary(glmht) Coefficients: Value Std. Error t value (Intercept) 160.8333 7.04603470 22.82608 humid 0.3000 0.02476744 12.11268 temp 0.2000 0.08089011 2.47249 (Dispersion Parameter for Gaussian family taken to be 1.962963 ) Residual Deviance: 17.66667 on 9 degrees of freedom > glmh$deviance - glmht$deviance [1] 12 > xstren <- c(190, 189, 190, 188, 196, 192, 195, 197, 203, 203, 203, 204) > xtemp <- c(rep(80, 6), rep(90, 6)) > cbind(xstren, xtemp, humid) xstren xtemp humid [1,] 190 80 40 [2,] 189 80 40 [3,] 190 80 40 [4,] 188 80 40 [5,] 196 80 60 [6,] 192 80 60 [7,] 195 90 60 [8,] 197 90 60 [9,] 203 90 80 [10,] 203 90 80 [11,] 203 90 80 [12,] 204 90 80 > glm0 <- glm(xstren ~ 1) > summary(glm0) Coefficients: Value Std. Error t value (Intercept) 195.8333 1.770265 110.6237 (Dispersion Parameter for Gaussian family taken to be 37.60606 ) Residual Deviance: 413.6667 on 11 degrees of freedom > glmt <- glm(xstren ~ xtemp) > summary(glmt) Coefficients: Value Std. Error t value (Intercept) 110.8333 16.5739085 6.687218 xtemp 1.0000 0.1946507 5.137408 (Dispersion Parameter for Gaussian family taken to be 11.36667 ) Residual Deviance: 113.6667 on 10 degrees of freedom > glm0$deviance - glmt$deviance [1] 300 > glmh <- glm(xstren ~ humid) > summary(glmh) Coefficients: Value Std. Error t value (Intercept) 174.8333 1.61804065 108.05250 humid 0.3500 0.02602082 13.45076 (Dispersion Parameter for Gaussian family taken to be 2.166667 ) Residual Deviance: 21.66667 on 10 degrees of freedom > glmht <- glm(xstren ~ humid + xtemp) > summary(glmht) Coefficients: Value Std. Error t value (Intercept) 160.8333 9.92759280 16.200638 humid 0.3000 0.04289846 6.993258 xtemp 0.2000 0.14010578 1.427493 (Dispersion Parameter for Gaussian family taken to be 1.962963 ) Residual Deviance: 17.66667 on 9 degrees of freedom > glmh$deviance - glmht$deviance [1] 4