#------------------------------------------------------------------- # 7. LOGISTIC REGRESSION #------------------------------------------------------------------- #------------------------------------------------------------------- # KYPHOSIS #------------------------------------------------------------------- > y <- (kyphosis$Kyphosis == "present") * 1 > age <- kyphosis$Age > cbind(y, age) y age [1,] 0 71 [2,] 0 158 [3,] 1 128 [4,] 0 2 [5,] 0 1 [6,] 0 1 [7,] 0 61 [8,] 0 37 [9,] 0 113 [10,] 1 59 [11,] 1 82 [12,] 0 148 [76,] 0 178 [77,] 1 157 [78,] 0 26 [79,] 0 120 [80,] 1 42 [81,] 0 36 > par(mfrow = c(2, 2)) > plot(age, y, yaxs = "s", main = "Figure 7.1 Kyphosis") > summary(glm(y ~ 1, family = binomial)) Coefficients: Value Std. Error t value (Intercept) -1.32567 0.2728526 -4.858557 (Dispersion Parameter for Binomial family taken to be 1 ) Residual Deviance: 83.23447 on 80 degrees of freedom Number of Fisher Scoring Iterations: 0 > glm1 <- glm(y ~ age, family = binomial) > summary(glm1) Coefficients: Value Std. Error t value (Intercept) -1.809076886 0.525924169 -3.439806 age 0.005439848 0.004797095 1.133988 (Dispersion Parameter for Binomial family taken to be 1 ) Residual Deviance: 81.93249 on 79 degrees of freedom Number of Fisher Scoring Iterations: 3 > i <- order(age) > i [1] 5 6 14 16 37 4 54 57 29 27 26 52 75 25 70 66 13 69 39 21 78 20 [23] 31 42 81 8 80 59 38 10 7 44 51 1 56 41 17 19 63 11 73 40 36 23 [45] 45 28 60 22 34 9 62 64 65 58 79 49 32 72 3 33 61 24 48 47 46 53 [67] 35 55 43 12 30 77 2 71 68 15 18 50 76 67 74 > lines(age[i], fitted(glm1)[i]) > a <- seq(-500, 1000, 100) > eta <- -1.809076886 + 0.005439848 * a > phat <- exp(eta)/(1 + exp(eta)) > plot(a, phat, yaxs = "s", type = "l", main = "Figure 7.2 Kyphosis enlarged") > points(age, y) > plot(age, y, yaxs = "s", main = "Figure 7.3 Quadratic in age") > age2 <- age^2 > glm2 <- glm(y ~ age + age2, family = binomial) > summary(glm2) Coefficients: Value Std. Error t value (Intercept) -3.7702868712 1.1501219836 -3.278163 age 0.0700349930 0.0269651682 2.597239 age2 -0.0003651679 0.0001477186 -2.472051 (Dispersion Parameter for Binomial family taken to be 1 ) Residual Deviance: 72.73858 on 78 degrees of freedom Number of Fisher Scoring Iterations: 5 > lines(age[i], fitted(glm2)[i]) #------------------------------------------------------------------- # 245-T #------------------------------------------------------------------- > misc <- c(10, 17, 18, 11, 16, 24, 20, 17, 9, 15, 16, 15) > totf <- c(222, 221, 188, 183, 197, 218, 208, 219, 198, 216, 244, 218) > herb <- c(0, 0, 0, 0, 1454, 3280, 788, 0, 304, 560, 0, 0) > p <- misc/totf > cbind(month.name, herb, misc, totf, p) month.name herb misc totf p [1,] "January" "0" "10" "222" "0.045045045045045" [2,] "February" "0" "17" "221" "0.0769230769230769" [3,] "March" "0" "18" "188" "0.0957446808510638" [4,] "April" "0" "11" "183" "0.0601092896174863" [5,] "May" "1454" "16" "197" "0.0812182741116751" [6,] "June" "3280" "24" "218" "0.110091743119266" [7,] "July" "788" "20" "208" "0.0961538461538462" [8,] "August" "0" "17" "219" "0.0776255707762557" [9,] "September" "304" "9" "198" "0.0454545454545455" [10,] "October" "560" "15" "216" "0.0694444444444444" [11,] "November" "0" "16" "244" "0.0655737704918033" [12,] "December" "0" "15" "218" "0.0688073394495413" > plot(herb, p, main = "Figure 7.4 245-T") > summary(glm(p ~ 1, family = binomial, weight = totf)) Coefficients: Value Std. Error t value (Intercept) -2.523172 0.07389905 -34.1435 (Dispersion Parameter for Binomial family taken to be 1 ) Residual Deviance: 13.21583 on 11 degrees of freedom Number of Fisher Scoring Iterations: 0 > glm <- glm(p ~ herb, family = binomial, weight = totf) > summary(glm) Coefficients: Value Std. Error t value (Intercept) -2.6206479509 0.08942583878 -29.305266 herb 0.0001629353 0.00007074149 2.303249 (Dispersion Parameter for Binomial family taken to be 1 ) Residual Deviance: 8.309981 on 10 degrees of freedom Number of Fisher Scoring Iterations: 5 > cbind(p, fitted(glm), resid(glm)) p 1 0.04504505 0.06782132 -1.43211128 2 0.07692308 0.06782132 0.52756503 3 0.09574468 0.06782132 1.43971078 4 0.06010929 0.06782132 -0.42258538 5 0.08121827 0.08442103 -0.16263392 6 0.11009174 0.11044338 -0.01657167 7 0.09615385 0.07640292 1.03404175 8 0.07762557 0.06782132 0.56488094 9 0.04545455 0.07102066 -1.49318096 10 0.06944444 0.07382237 -0.24836446 11 0.06557377 0.06782132 -0.14035288 12 0.06880734 0.06782132 0.05777118 > i <- order(herb) > lines(herb[i], fitted(glm)[i]) > h <- seq(-5000, 35000, 1000) > eta <- -2.6206479509 + 0.0001629353 * h > phat <- exp(eta)/(1 + exp(eta)) > plot(h, phat, yaxs = "s", type = "l", main = "Figure 7.5 245-T enlarged") > points(herb, p) #------------------------------------------------------------------- # CLINICS (AGAIN) #------------------------------------------------------------------- > died <- c(3, 4, 17, 2) > surv <- c(176, 293, 197, 23) > totc <- died + surv > p <- died/totc > clin <- factor(c(1, 1, 2, 2)) > care <- factor(c(1, 2, 1, 2)) > cbind(p, clin, care) p clin care [1,] 0.01675978 1 1 [2,] 0.01346801 1 2 [3,] 0.07943925 2 1 [4,] 0.08000000 2 2 > options(contrasts = c("contr.treatment", "contr.poly")) > summary(glm(p ~ 1, family = binomial, weight = totc)) Coefficients: Value Std. Error t value (Intercept) -3.277145 0.1996426 -16.41506 (Dispersion Parameter for Binomial family taken to be 1 ) Residual Deviance: 17.8284 on 3 degrees of freedom Number of Fisher Scoring Iterations: 0 > summary(glm(p ~ care, family = binomial, weight = totc)) Coefficients: Value Std. Error t value (Intercept) -2.925846 0.2295233 -12.74749 care -1.038135 0.4713954 -2.20226 (Dispersion Parameter for Binomial family taken to be 1 ) Residual Deviance: 12.21642 on 2 degrees of freedom Number of Fisher Scoring Iterations: 6 > summary(glm(p ~ clin, family = binomial, weight = totc)) Coefficients: Value Std. Error t value (Intercept) -4.204693 0.3807720 -11.042546 clin 1.755504 0.4496269 3.904357 (Dispersion Parameter for Binomial family taken to be 1 ) Residual Deviance: 0.0822892 on 2 degrees of freedom Number of Fisher Scoring Iterations: 7 > summary(glm(p ~ care + clin, family = binomial, weight = totc)) Coefficients: Value Std. Error t value (Intercept) -4.1372220 0.5076773 -8.1493153 care -0.1103762 0.5610132 -0.1967444 clin 1.6991086 0.5306596 3.2018805 (Dispersion Parameter for Binomial family taken to be 1 ) Residual Deviance: 0.0432559 on 1 degrees of freedom Number of Fisher Scoring Iterations: 7 > glm <- glm(p ~ clin, family = binomial, weight = totc) > cbind(p * totc, fitted(glm) * totc, resid(glm)) [,1] [,2] [,3] 1 3 2.632353 0.223333656 2 4 4.367647 -0.179764389 3 17 17.012552 -0.003172306 4 2 1.987448 0.009271422 #------------------------------------------------------------------- # 245-T (AGAIN) #------------------------------------------------------------------- > freq <- c(misc, totf - misc) > herbb <- c(herb, herb) > month <- factor(rep(1:12, 2)) > surv <- factor(rep(1:2, each = 12)) > cbind(month, surv, freq, herbb) month surv freq herbb [1,] 1 1 10 0 [2,] 2 1 17 0 [3,] 3 1 18 0 [4,] 4 1 11 0 [5,] 5 1 16 1454 [6,] 6 1 24 3280 [7,] 7 1 20 788 [8,] 8 1 17 0 [9,] 9 1 9 304 [10,] 10 1 15 560 [11,] 11 1 16 0 [12,] 12 1 15 0 [13,] 1 2 212 0 [14,] 2 2 204 0 [15,] 3 2 170 0 [16,] 4 2 172 0 [17,] 5 2 181 1454 [18,] 6 2 194 3280 [19,] 7 2 188 788 [20,] 8 2 202 0 [21,] 9 2 189 304 [22,] 10 2 201 560 [23,] 11 2 228 0 [24,] 12 2 203 0 > summary(glm(freq ~ month + surv + surv:herbb, family = poisson)) Coefficients: (1 not defined because of singularities) Value Std. Error t value (Intercept) 2.7117987053 0.10701492744 25.34037793 month2 -0.0045146807 0.09502173442 -0.04751209 month3 -0.1662354191 0.09911263247 -1.67723745 month4 -0.1931912294 0.09984338873 -1.93494263 month5 -0.1374415555 0.09820608251 -1.39952182 month6 -0.0649836753 0.09828201044 -0.66119603 month7 -0.0743879081 0.09657761701 -0.77023963 month8 -0.0136056525 0.09523892114 -0.14285811 month9 -0.1178483716 0.09775790903 -1.20551240 month10 -0.0338574537 0.09560901125 -0.35412409 month11 0.0944908430 0.09275027518 1.01876617 month12 -0.0181823195 0.09534882330 -0.19069265 surv 2.6206479149 0.08942026328 29.30709236 surv1herbb 0.0001629353 0.00007074071 2.30327474 surv2herbb NA NA NA (Dispersion Parameter for Poisson family taken to be 1 ) Residual Deviance: 8.309981 on 10 degrees of freedom Number of Fisher Scoring Iterations: 3 #------------------------------------------------------------------- # BIRDS #------------------------------------------------------------------- > cal <- c(36.9, 41.1, 35.8, 40.6, 34.6, 38.9, 34.3, 37.9, 32.8, 37, 31.7, 36.1, 31, 36.3, 29.8, 34.2, 29.1, 33.4, 28.2, 32.8, 27.4, 32, 27.8, 31.9, 25.5, 30.7, 24.9, 29.5, 23.7, 28.5, 23.1, 27.7) > spec <- factor(rep(1:2, 16)) > temp <- rep((0:15) * 2, each = 2) > plot(temp, cal, type = "n", main = "Figure 7.6 Birds") > points(temp[spec == 1], cal[spec == 1], pch = "A") > points(temp[spec == 2], cal[spec == 2], pch = "B") > glm <- glm(cal ~ spec - 1 + temp:spec) > summary(glm) Coefficients: Value Std. Error t value spec1 36.5794118 0.19739011 185.31532 spec2 40.8389706 0.19739011 206.89471 spec1temp -0.4527941 0.01121101 -40.38836 spec2temp -0.4367647 0.01121101 -38.95857 (Dispersion Parameter for Gaussian family taken to be 0.1709338 ) Residual Deviance: 4.786147 on 28 degrees of freedom Number of Fisher Scoring Iterations: 1 > abline(coef(glm)[c(1, 3)]) > abline(coef(glm)[c(2, 4)]) > glm <- glm(cal ~ temp * spec) > summary(glm) Coefficients: Value Std. Error t value (Intercept) 36.57941176 0.19739011 185.315319 temp -0.45279412 0.01121101 -40.388361 spec 4.25955882 0.27915178 15.258935 temp:spec 0.01602941 0.01585476 1.011016 (Dispersion Parameter for Gaussian family taken to be 0.1709338 ) Residual Deviance: 4.786147 on 28 degrees of freedom Number of Fisher Scoring Iterations: 1 > plot(temp, resid(glm), type = "n", main = "Figure 7.7 Birds residuals") > points(temp[spec == 1], resid(glm)[spec == 1], pch = "A") > points(temp[spec == 2], resid(glm)[spec == 2], pch = "B") > Temp <- factor(temp) > glm <- glm(cal ~ Temp + temp * spec) > summary(glm) Coefficients: (1 not defined because of singularities) Value Std. Error t value (Intercept) 36.87022059 0.22214674 165.972365 Temp2 -0.81602941 0.28371694 -2.876210 Temp4 -2.28205882 0.28434118 -8.025777 Temp6 -2.94808824 0.28537855 -10.330448 Temp8 -4.16411765 0.28682456 -14.517996 Temp10 -5.18014706 0.28867307 -17.944684 Temp12 -5.44617647 0.29091641 -18.720760 Temp14 -7.11220588 0.29354553 -24.228629 Temp16 -7.87823529 0.29655017 -26.566281 Temp18 -8.64426471 0.29991904 -28.821994 Temp20 -9.46029412 0.30364002 -31.156282 Temp22 -9.32632353 0.30770034 -30.309761 Temp24 -11.09235294 0.31208674 -35.542532 Temp26 -12.00838235 0.31678569 -37.906959 Temp28 -13.12441176 0.32178350 -40.786466 Temp30 -13.84044118 0.32706645 -42.316908 temp NA NA NA spec 4.25955882 0.19142231 22.252154 temp:spec 0.01602941 0.01087206 1.474368 (Dispersion Parameter for Gaussian family taken to be 0.0803771 ) Residual Deviance: 1.125279 on 14 degrees of freedom Number of Fisher Scoring Iterations: 1 > diff <- cal[spec == 2] - cal[spec == 1] > tem <- (0:15) * 2 > plot(tem, diff, main = "Figure 7.8 Birds differences") > summary(glm(diff ~ tem)) Coefficients: Value Std. Error t value (Intercept) 4.25955882 0.19142231 22.252154 tem 0.01602941 0.01087206 1.474368 (Dispersion Parameter for Gaussian family taken to be 0.1607542 ) Residual Deviance: 2.250559 on 14 degrees of freedom Number of Fisher Scoring Iterations: 1 #------------------------------------------------------------------- # ESKIMO #------------------------------------------------------------------- > m <- matrix(scan("/glim/datasets/eskimo"), ncol = 2, byrow = T) > pres <- m[, 1] > abs <- m[, 2] > tot <- pres + abs > p <- pres/tot > age <- rep(1:6, 6) > plot(age, p, yaxs = "s", main = "Figure 7.9 Eskimo") > glm <- glm(p ~ age, family = binomial, weight = tot) > summary(glm) Coefficients: Value Std. Error t value (Intercept) -2.715928 0.24201025 -11.22237 age 0.777335 0.07517125 10.34086 (Dispersion Parameter for Binomial family taken to be 1 ) Residual Deviance: 59.18718 on 34 degrees of freedom Number of Fisher Scoring Iterations: 4 > lines(age[1:6], fitted(glm)[1:6]) > a2 <- age^2 > glm <- glm(p ~ age + a2, family = binomial, weight = tot) > summary(glm) Coefficients: Value Std. Error t value (Intercept) -3.49915048 0.4896266 -7.146569 age 1.36435203 0.3171085 4.302477 a2 -0.08971686 0.0461107 -1.945684 (Dispersion Parameter for Binomial family taken to be 1 ) Residual Deviance: 55.4498 on 33 degrees of freedom Number of Fisher Scoring Iterations: 4 Correlation of Coefficients: (Intercept) age age -0.9465682 a2 0.8633263 -0.9724642 > Age <- factor(age) > glm <- glm(p ~ age + a2 + Age, family = binomial, weight = tot) > summary(glm) Call: glm(formula = p ~ age + a2 + Age, family = binomial, weights = tot) Deviance Residuals: Min 1Q Median 3Q Max -2.246959 -0.8191269 -0.02167127 0.8726518 2.683292 Coefficients: (2 not defined because of singularities) Value Std. Error t value (Intercept) -3.02917623 0.7538406 -4.0183244 age 0.88427348 0.7623635 1.1599106 a2 -0.02223773 0.1149731 -0.1934168 Age2 0.19690066 0.4858611 0.4052612 Age3 0.25225565 0.6242481 0.4040952 Age4 0.66886647 0.6079110 1.1002704 Age5 NA NA NA Age6 NA NA NA (Dispersion Parameter for Binomial family taken to be 1 ) Null Deviance: 203.5681 on 35 degrees of freedom Residual Deviance: 53.12781 on 30 degrees of freedom Number of Fisher Scoring Iterations: 4 > sex <- factor(rep(1:2, each = 6, length.out = 36)) > pop <- factor(rep(1:3, each = 12, length.out = 36)) > glm(p ~ age + sex, family = binomial, weight = tot) Coefficients: (Intercept) age sex -2.642517 0.7773744 -0.1588495 Degrees of Freedom: 36 Total; 33 Residual Residual Deviance: 58.58853 > glm(p ~ age + pop, family = binomial, weight = tot) Coefficients: (Intercept) age pop2 pop3 -2.652033 0.8012799 -0.070828 -0.6030449 Degrees of Freedom: 36 Total; 32 Residual Residual Deviance: 54.17556 > glm(p ~ age + sex + pop, family = binomial, weight = tot) Coefficients: (Intercept) age sex pop2 pop3 -2.578529 0.8011843 -0.157841 -0.07261133 -0.603388 Degrees of Freedom: 36 Total; 31 Residual Residual Deviance: 53.59069 > glm(p ~ age + pop + age:pop, family = binomial, weight = tot) Coefficients: (Intercept) age pop2 pop3 agepop2 agepop3 -3.226308 1.006941 0.210008 1.470775 -0.0997709 -0.6480929 Degrees of Freedom: 36 Total; 30 Residual Residual Deviance: 40.46303 > glm(p ~ age + sex + pop + age:pop, family = binomial, weight = tot) Coefficients: (Intercept) age sex pop2 pop3 agepop2 -3.146665 1.010145 -0.1908951 0.2225335 1.493391 -0.1049971 agepop3 -0.6556682 Degrees of Freedom: 36 Total; 29 Residual Residual Deviance: 39.63187 > glm(p ~ age + sex + pop + age:sex + age:pop, family = binomial, weight = tot) Coefficients: (Intercept) age sex pop2 pop3 age:sex -3.155941 1.013425 -0.1703874 0.223182 1.493404 -0.00704755 agepop2 agepop3 -0.1053317 -0.6558677 Degrees of Freedom: 36 Total; 28 Residual Residual Deviance: 39.62984 > glm(p ~ age + sex + pop + age:sex + age:pop + sex:pop, family = binomial, weight = tot) Coefficients: (Intercept) age sex pop2 pop3 age:sex -3.232137 0.9972477 0.01332695 0.4155557 1.802575 0.02030069 agepop2 agepop3 sexpop2 sexpop3 -0.09918795 -0.6559149 -0.4600432 -0.6808865 Degrees of Freedom: 36 Total; 26 Residual Residual Deviance: 37.65135 > glm(p ~ age + sex + pop + age:sex + age:pop + sex:pop + age:sex:pop, family = binomial, weight = tot) Coefficients: (Intercept) age sex pop2 pop3 age:sex agepop2 -3.797524 1.200114 1.086031 1.503164 2.649291 -0.3641017 -0.4931793 agepop3 sexpop2 sexpop3 age:sexpop2 age:sexpop3 -0.9441316 -3.173625 -2.460935 0.9715633 0.5976373 Degrees of Freedom: 36 Total; 24 Residual Residual Deviance: 32.13897 > glm <- glm(p ~ age + pop + age:pop, family = binomial, weight = tot) > summary(glm) Coefficients: Value Std. Error t value (Intercept) -3.2263077 0.3566781 -9.0454336 age 1.0069415 0.1173754 8.5788085 pop2 0.2100080 0.6515996 0.3222961 pop3 1.4707751 0.5989436 2.4556155 agepop2 -0.0997709 0.2142048 -0.4657733 agepop3 -0.6480929 0.1761453 -3.6793082 (Dispersion Parameter for Binomial family taken to be 1 ) Residual Deviance: 40.46303 on 30 degrees of freedom Number of Fisher Scoring Iterations: 4 > glm <- glm(p ~ pop - 1 + age:pop, family = binomial, weight = tot) > summary(glm) Coefficients: Value Std. Error t value pop1 -3.2263077 0.3566781 -9.045434 pop2 -3.0162997 0.5453099 -5.531350 pop3 -1.7555325 0.4811592 -3.648548 pop1age 1.0069415 0.1173754 8.578808 pop2age 0.9071706 0.1791835 5.062803 pop3age 0.3588485 0.1313399 2.732212 (Dispersion Parameter for Binomial family taken to be 1 ) Residual Deviance: 40.46303 on 30 degrees of freedom Number of Fisher Scoring Iterations: 4 > plot(age, p, yaxs = "s", type = "n", main = "Figure 7.10 Eskimo") > points(age[pop == 1], p[pop == 1], pch = "1") > points(age[pop == 2], p[pop == 2], pch = "2") > points(age[pop == 3], p[pop == 3], pch = "3") > lines(age[1:6], fitted(glm)[1:6], lty = 2) > lines(age[13:18], fitted(glm)[13:18], lty = 3) > lines(age[25:30], fitted(glm)[25:30], lty = 4)