> #------------------------------------------------------------------- > # 9.1 CHOOSING THE FAMILY > #------------------------------------------------------------------- > > snow<-c(4.1, 12.2, 0, 14.6, 2.5, 2.1, 4.1, 11.5, 0.7, 0, + 23.3, 6.3, 0, 0, 0, 3.1, 2.6, 1.0, 0.3, 0, + 0, 0, 2.3, 11.3, 6.5, 0, 4.8, 1.1, 0, 17.4, + 0, 10.9, 1.3, 0, 0.8, 0, 1.0, 18.4, 0, 0, + 0, 3.8, 0, 7.0, 31.0, 0, 6.6, 0, 16.9, 0, + 0.3, 10.1, 0, 5.0, 1.0) > year<-6:60 > par(mfrow=c(2,2)) > plot(year,snow,yaxs="i",ylim=c(0,max(snow)),main="Figure 9.1 Snow") > > # poissonexp<-list( > # family=c(name="poissonexp",link="log",variance="mu^(3/2)"), > # names="log", > # link=function(mu) log(mu), > # inverse=function(eta) care.exp(eta), > # deriv=function(mu) 1/mu, > # initialize=expression({y <- as.numeric(y) > # mu <- y+0.167*(y==0)}), > # variance=function(mu) mu^(3/2), > # deviance=function(mu, y, w, residuals = F) > # if(residuals) sqrt(w)*(sqrt(y)-sqrt(mu))*2/sqrt(sqrt(mu)) else > # sum(w*(sqrt(y)-sqrt(mu))^2*4/sqrt(mu)), > # weight=expression(w/( (sqrt(family$variance(mu)) * family$deriv(mu))^2) ) ) > # class(poissonexp)<-"family" > > print(poisson) function (link = "log") { linktemp <- substitute(link) if (!is.character(linktemp)) { linktemp <- deparse(linktemp) if (linktemp == "link") linktemp <- eval(link) } if (any(linktemp == c("log", "identity", "sqrt"))) stats <- make.link(linktemp) else stop(paste(linktemp, "link not available for poisson", "family; available links are", "\"identity\", \"log\" and \"sqrt\"")) variance <- function(mu) mu validmu <- function(mu) all(mu > 0) dev.resids <- function(y, mu, wt) 2 * wt * (y * log(ifelse(y == 0, 1, y/mu)) - (y - mu)) aic <- function(y, n, mu, wt, dev) 2 * sum((mu - y * log(mu) + lgamma(y + 1)) * wt) initialize <- expression({ if (any(y < 0)) stop(paste("Negative values not allowed for", "the Poisson family")) n <- rep(1, nobs) mustart <- y + 0.1 }) structure(list(family = "poisson", link = linktemp, linkfun = stats$linkfun, linkinv = stats$linkinv, variance = variance, dev.resids = dev.resids, aic = aic, mu.eta = stats$mu.eta, initialize = initialize, validmu = validmu, valideta = stats$valideta), class = "family") } > > poissonexp <- function (link = "log") + { + linktemp <- substitute(link) + if (!is.character(linktemp)) { + linktemp <- deparse(linktemp) + if (linktemp == "link") + linktemp <- eval(link) + } + if (any(linktemp == c("log", "identity", "sqrt"))) + stats <- make.link(linktemp) + else stop(paste(linktemp, "link not available for poisson", + "family; available links are", "\"identity\", \"log\" and \"sqrt\"")) + variance <- function(mu) mu^(3/2) + validmu <- function(mu) all(mu > 0) + dev.resids <- function(y, mu, wt) + wt*(sqrt(y)-sqrt(mu))^2*4/sqrt(mu) + aic <- function(y, n, mu, wt, dev) { + NaN + } + initialize <- expression({ + if (any(y < 0)) stop(paste("Negative values not allowed for", + "the Poissonexp family")) + n <- rep(1, nobs) + mustart <- y + 0.1 + }) + structure(list(family = "poissonexp", link = linktemp, linkfun = stats$linkfun, + linkinv = stats$linkinv, variance = variance, dev.resids = dev.resids, + aic = aic, mu.eta = stats$mu.eta, initialize = initialize, + validmu = validmu, valideta = stats$valideta), class = "family") + } > > summary(glm(snow~1,family=poissonexp)) Call: glm(formula = snow ~ 1, family = poissonexp) Deviance Residuals: Min 1Q Median 3Q Max -2.9082 -2.9082 -1.5328 0.5712 4.7497 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 1.4976 0.2035 7.358 1.08e-09 *** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 (Dispersion parameter for poissonexp family taken to be 4.830136) Null deviance: 289.17 on 54 degrees of freedom Residual deviance: 289.17 on 54 degrees of freedom AIC: NaN Number of Fisher Scoring iterations: 3 > summary(glm(snow~year,family=poissonexp)) Call: glm(formula = snow ~ year, family = poissonexp) Deviance Residuals: Min 1Q Median 3Q Max -2.9098 -2.9075 -1.5304 0.5699 4.7534 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 1.500e+00 4.739e-01 3.166 0.00256 ** year -8.373e-05 1.294e-02 -0.006 0.99486 --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 (Dispersion parameter for poissonexp family taken to be 4.921798) Null deviance: 289.17 on 54 degrees of freedom Residual deviance: 289.17 on 53 degrees of freedom AIC: NaN Number of Fisher Scoring iterations: 3 > theta<-2*pi*year/11 > sine<-sin(theta) > cosine<-cos(theta) > glm<-glm(snow~sine+cosine,family=poissonexp) > summary(glm) Call: glm(formula = snow ~ sine + cosine, family = poissonexp) Deviance Residuals: Min 1Q Median 3Q Max -3.1014 -2.8034 -1.5223 0.3181 4.0906 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 1.4769 0.2033 7.265 1.87e-09 *** sine -0.1617 0.2866 -0.564 0.575 cosine -0.2376 0.2869 -0.828 0.411 --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 (Dispersion parameter for poissonexp family taken to be 4.74355) Null deviance: 289.17 on 54 degrees of freedom Residual deviance: 284.37 on 52 degrees of freedom AIC: NaN Number of Fisher Scoring iterations: 3 > lines(year,fitted(glm)) > > #------------------------------------------------------------------- > # 9.2 CHOOSING THE LINK > #------------------------------------------------------------------- > > m<-matrix(scan("/glim/datasets/trees"), ncol=3,byrow=T) Read 93 items > diam<-m[,1] > height<-m[,2] > vol<-m[,3] > glm(vol~diam+height ) Call: glm(formula = vol ~ diam + height) Coefficients: (Intercept) diam height -57.9877 4.7082 0.3393 Degrees of Freedom: 30 Total (i.e. Null); 28 Residual Null Deviance: 8106 Residual Deviance: 421.9 AIC: 176.9 > 4.708161/ 0.3392512 [1] 13.87810 > glm(vol~diam+height,family=quasi(variance=constant,link=log) ) Call: glm(formula = vol ~ diam + height, family = quasi(variance = constant, link = log)) Coefficients: (Intercept) diam height 0.67942 0.13416 0.01114 Degrees of Freedom: 30 Total (i.e. Null); 28 Residual Null Deviance: 8106 Residual Deviance: 272.6 AIC: NA > 0.1341617 /0.0111431 [1] 12.03989 > glm(vol~diam+height,family=quasi(variance=constant,link=sqrt) ) Call: glm(formula = vol ~ diam + height, family = quasi(variance = constant, link = sqrt)) Coefficients: (Intercept) diam height -3.10926 0.41064 0.03913 Degrees of Freedom: 30 Total (i.e. Null); 28 Residual Null Deviance: 8106 Residual Deviance: 185.7 AIC: NA > 0.4106364 /0.03913296 [1] 10.49336 > > vol [1] 10.3 10.3 10.2 16.4 18.8 19.7 15.6 18.2 22.6 19.9 24.2 21.0 21.4 21.3 19.1 [16] 22.2 33.8 27.4 25.7 24.9 34.5 31.7 36.3 38.3 42.6 55.4 55.7 58.3 51.5 51.0 [31] 77.0 > fvol<-floor(vol/5)-1 > fvol [1] 1 1 1 2 2 2 2 2 3 2 3 3 3 3 2 3 5 4 4 3 5 5 6 6 7 10 [27] 10 10 9 9 14 > plot(height,diam,type="n",main="Figure 9.2 Trees") > for(i in 1:14) + text(height[fvol==i],diam[fvol==i],i) > > Fvol<-factor(fvol) > glm<-glm(vol~diam+height+Fvol) > glm Call: glm(formula = vol ~ diam + height + Fvol) Coefficients: (Intercept) diam height Fvol2 Fvol3 Fvol4 -14.5198 1.4962 0.1814 2.6088 5.7454 6.6353 Fvol5 Fvol6 Fvol7 Fvol9 Fvol10 Fvol14 12.6660 15.7644 18.7680 24.3304 30.0141 44.9209 Degrees of Freedom: 30 Total (i.e. Null); 19 Residual Null Deviance: 8106 Residual Deviance: 24.49 AIC: 106.7 > cov<-summary(glm)$cov.unscaled[2:3,2:3] > cov diam height diam 0.14294736 0.010633641 height 0.01063364 0.002602849 > evec<-eigen(cov) > evec $values [1] 0.143748475 0.001801731 $vectors height diam diam -0.99717411 0.07512518 height -0.07512518 -0.99717411 > -0.99717411 /-0.07512518 [1] 13.2735 > eta<-13.2735*diam+height > plot(vol,eta,main="Figure 9.3 Estimated link function") > >