The contents of this file are copyright of Cambridge University Press, 1997. ========== Chapter 2 ========== Practical 2.1 (City data) ======================== m1 <- mean(log(city$u)); m2 <- mean(log(city$x)) s1 <- sqrt(var(log(city$u))); s2 <- sqrt(var(log(city$x))) rho <- cor(log(city))[1,2] city.mle <- c(m1, m2, s1, s2, rho ) city.sim <- function( city, mle ) { n <- nrow(city); z1 <- rnorm(n); z2 <- rnorm(n) z2 <- mle[5]*z1+sqrt(1-mle[5]^2)*z2; data.frame( u=exp(mle[1]+mle[3]*z1), x=exp(mle[2]+mle[4]*z2) ) } city.fun <- function( data, i=1:nrow(data) ) { d <- data[i,] tstar <- sum(d$x)/sum(d$u) ubar <- mean(d$u); c( tstar, sum( (d$x-tstar*d$u)^2/(nrow(d)*ubar)^2 ) ) } city.para <- boot(city,city.fun,R=999,sim="parametric",ran.gen=city.sim, mle=city.mle) tstar <- city.para$t[,1] zstar <- (tstar-city.para$t0[1])/sqrt(city.para$t[,2]) split.screen(c(1,2)) screen(1); hist(tstar) screen(2); hist(zstar) screen(1); qqnorm(tstar,pch=".") screen(2); qqnorm(zstar,pch="."); abline(0,1,lty=2) city.para$t0[1] - sort(tstar-city.para$t0[1])[c(975,25)] city.para$t0[1] - sqrt(city.para$t0[2])*sort(zstar)[c(975,25)] Practical 2.2 (Carbon monoxide transfer data) =========================================== attach(co.transfer) plot(0.5*(entry+week),week-entry) t.test(week-entry) co.fun <- function( data, i ) { d <- data[i,] y <- d$week-d$entry; c( mean(y), var(y)/nrow(d) ) } co.boot <- boot( co.transfer, co.fun, R=999) split.screen(c(1,2)) screen(1); split.screen(c(2,1)); screen(3); qqnorm( co.boot$t[,1],ylab="t*",pch=".") abline(co.boot$t0[1],sqrt(co.boot$t0[2]),lty=2) screen(2); plot(co.boot$t[,1],sqrt(co.boot$t[,2]),xlab="t*",ylab="SE*") screen(4); z <- (co.boot$t[,1]-co.boot$t0[1])/sqrt(co.boot$t[,2]); qqnorm(z); abline(0,1,lty=2) Practical 2.3 (CD4 data) ====================== corr.fun <- function(d, w = rep(1, nrow(d))/nrow(d)) { w <- w/sum(w) n <- nrow(d) m1 <- sum(d[, 1] * w) m2 <- sum(d[, 2] * w) v1 <- sum(d[, 1]^2 * w) - m1^2 v2 <- sum(d[, 2]^2 * w) - m2^2 rho <- (sum(d[, 1] * d[, 2] * w) - m1 * m2)/sqrt(v1 * v2) i <- rep(1:n,round(n*w)) us <- (d[i, 1] - m1)/sqrt(v1) xs <- (d[i, 2] - m2)/sqrt(v2) L <- us * xs - 0.5 * rho * (us^2 + xs^2) c(rho, sum(L^2)/n^2) } cd4.boot <- boot( cd4, corr.fun, R=999, stype="w",keepf=T) t0 <- cd4.boot$t0[1]; tstar <- cd4.boot$t[,1]; vL <- cd4.boot$t[,2] zstar <- (tstar-t0)/sqrt(vL); fisher <- function( r ) 0.5*log( (1+r)/(1-r) ) split.screen(c(1,2)) screen(1); plot(tstar,vL) screen(2); plot(fisher(tstar),vL/(1-tstar^2)^2) zstar <- (fisher(tstar)-fisher(t0))/sqrt(vL/(1-tstar^2)^2) v0 <- cd4.boot$t0[2]/(1-t0^2)^2; fisher(t0) - sqrt(v0)*sort(zstar)[c(975,25)] Practical 2.4 (Quantile simulations) ================================== split.screen(c(4,4)) quantiles <- matrix(NA,16,4) n <- c(39,99,399,999) p <- c(0.025,0.05,0.95,0.975) for (i in 1:4) { y <- rnorm(999) for (j in 1:4) { quantiles[(j-1)*4+i,] <- quantile(y[1:n[j]], probs=p) screen((i-1)*4+j) qqnorm(y[1:n[j]],ylab="y",main=paste("R = ",n[j])) abline(h=quantile(y[1:n[j]],p),lty=2) } } Practical 2.5 (Jackknives) ======================== L.inf <- empinf(data=cd4,statistic=corr.fun) L.jack <- empinf(data=cd4,statistic=corr.fun,type="jack") L.reg <- empinf(boot.out=cd4.boot,type="reg") split.screen(c(1,2)) screen(1); plot(L.inf,L.jack); screen(2); plot(L.inf,L.reg) v.inf <- sum(L.inf^2)/nrow(cd4)^2 v.jack <- var(L.jack)/nrow(cd4) v.reg <- sum(L.reg^2)/nrow(cd4)^2 v.boot <- var(cd4.boot$t[,1]) c(v.inf,v.reg,v.jack,v.boot) plot(tstar,linear.approx(cd4.boot,L.reg)) ========== Chapter 3 ========== Practical 3.1 (Gravity data) =========================== grav.fun <- function( data, i ) { d <- data[i,]; m <- tapply(d$g,d$series,mean); v <- tapply(d$g,d$series,var); n <- table(d$series); c( sum(m*n/v)/sum(n/v), 1/sum(n/v) ) } grav.boot <- boot( gravity, grav.fun, R=200, strata=gravity$series) attach(gravity); n <- table(series) m <- rep( tapply(g, series, mean), n ); s <- rep(sqrt(tapply(g,series,var)),n); res <- (g-m)/s qqnorm(res); abline(0,1,lty=2) grav <- data.frame( m, s, series, res ) grav.fun <- function( data, i ) { e <- data$res[i]; y <- data$m + data$s*e; m <- tapply(y, data$series, mean); v <- tapply(y, data$series, var ); n <- table( data$series); c( sum( m*n/v)/sum( n/v), 1/sum( n/v) ) } grav1.boot <- boot( grav, grav.fun, R=200) Practical 3.2 (Channing data) =========================== chan <- channing[1:97,] # men only chan$age <- chan$entry+chan$time attach(chan) chan.F <- survfit(Surv(age, cens)) chan.F max(chan.F$surv[chan.F$time>900]) max(chan.F$surv[chan.F$time>1020]) chan.G <- survfit(Surv(age-0.01*cens,1-cens)) split.screen(c(2,1)) screen(1); plot(chan.F,xlim=c(760,1200),main="survival") screen(2); plot(chan.G,xlim=c(760,1200),main="censoring") chan.fun <- function(data) { s <- survfit(Surv(age,cens),data=data) c( max(s$surv[s$time>900]), max(s$surv[s$time>1020]), min(s$time[(s$surv<=0.75)]), min(s$time[(s$surv<=0.5)]) ) } chan.boot1 <- censboot( chan, chan.fun, R=99, sim = "ordinary") chan.boot2 <- censboot( chan, chan.fun, R=99, F.surv=chan.F, G.surv=chan.G, sim = "cond",index=c(6,5)) chan.boot3 <- censboot( chan, chan.fun, R=99, F.surv=chan.F, sim = "weird",index=c(6,5)) Practical 3.3 (Censoring simulation) =================================== exp.fun <- function(d) { d.s <- survfit(Surv(y, cens),data=d) prob <- min(d.s$surv[d.s$time<1]) med <- min(d.s$time[(1-d.s$surv)>=0.5]) c( sum(d$y)/sum(d$cens), sum(1-d$cens) ) } results <- NULL; nreps <- 100; n <- 50; R <- 25; cens <- 3*runif(n); for (i in 1:nreps) { y0 <- rexp(n); junk <- data.frame( y = pmin(y0,cens), cens = as.numeric(y0darwin.rand$t0 ) )/(1+darwin.rand$R) darwin.f <- function( d ) { n <- length(d); m <- 2; t1 <- mean( d ); v1 <- sum( (d-t1)^2 )/n^2; d <- sort(d)[(m+1):(n-m)] t2 <- mean( d ); v2 <- ( (sum((d-t2)^2) + m*(min(d)-t2)^2 + m*(max(d)-t2)^2 ) )/(n*(n-2*m)) c( t1, v1, t2, v2 ) } darwin.fun <- function( data, i ) { sign <- sample(c(-1,1),size=length(data),replace=T) d <- sign*data darwin.f( d ) } darwin.ad <- boot( darwin, darwin.f, R=999, sim="parametric", ran.gen=darwin.gen, mle=nrow(darwin) ) darwin.ad$t0 i <- c(1:999)[darwin.ad$t[,2]>darwin.ad$t[,4]] (1+sum( darwin.ad$t[i,3]>darwin.ad$t0[3] ) )/(1+length(i)) Practical 4.5 (Paulsen data) =========================== h <- 1.5 hist(paulsen$y,probability=T,breaks=c(0:30)) lines(density(paulsen$y,width=4*h,from=0,to=30)) peak.test <- function( y, h ) { dens <- density(y,width=4*h,n=100) sum( peaks( dens$y[(dens$x>=0)&(dens$x<=20)] )) } peak.test(paulsen$y, h ) peak.gen <- function( d, mle) { n <- mle[1]; h <- mle[2] i <- sample(n,n,replace=T) d[i]+h*rnorm(n) } paulsen.boot <- boot( paulsen$y, peak.test, R=999, sim="parametric", ran.gen=peak.gen, mle=c(nrow(paulsen),1.87), h=1.87) shrunk.gen <- function( d, mle) { n <- mle[1]; h <- mle[2] v <- var(d) (d[sample(n,n,replace=T)]+h*rnorm(n))/sqrt(1+h^2/v) } paulsen.boot <- boot( paulsen$y, peak.test, R=999, sim="parametric", ran.gen=shrunk.gen, mle=c(nrow(paulsen),1.87), h=1.87) ========== Chapter 5 ========== Practical 5.1 (CD4 data) ======================== cd4.boot <- boot( cd4, corr.fun, stype="w", R=999 ) boot.ci(cd4.boot,conf=0.9) fisher <- function( r ) 0.5*log( (1+r)/(1-r) ) fisher.dot <- function( r ) 1/(1-r^2) fisher.inv <- function( z ) ( exp(2*z)-1 )/( exp(2*z)+1 ) boot.ci(cd4.boot,h=fisher,hdot=fisher.dot,hinv=fisher.inv,conf=0.9) cd4.rg <- function( data, mle ) { d <- matrix( rnorm(2*nrow(data)), nrow(data), 2 ) d[,2] <- mle[5]*d[,1]+sqrt(1-mle[5]^2)*d[,2]; d[,1] <- mle[1]+mle[3]*d[,1]; d[,2] <- mle[2]+mle[4]*d[,2]; d } n <- nrow(cd4) cd4.mle <- c( apply(cd4,2,mean),sqrt(apply(cd4,2,var)*(n-1)/n),corr(cd4) ) cd4.para <- boot( cd4, corr.fun, R=999, sim="parametric", ran.gen = cd4.rg, mle=cd4.mle ) boot.ci(cd4.para,type=c("norm","basic","stud","perc"),conf=0.9) boot.ci(cd4.para,h=fisher,hdot=fisher.dot,hinv=fisher.inv, type=c("norm","basic","stud","perc"),conf=0.9) abc.ci( cd4, corr, conf=0.9 ) Practical 5.2 (Eigenvalue) ======================== eigen.fun <- function(d, w = rep(1, nrow(d))/nrow(d)) { w <- w/sum(w) n <- nrow(d) m <- crossprod( w, d ) m2 <- sweep(d,2,m) v <- crossprod( diag(sqrt(w)) %*% m2 ) eig <- eigen(v,symmetric=T) stat <- eig$values[1]; e <- eig$vectors[,1] i <- rep(1:n,round(n*w)) ds <- sweep(d[i,],2,m) L <- (ds%*%e)^2 - stat c(stat, sum(L^2)/n^2) } cd4.boot <- boot(cd4,eigen.fun,R=999,stype="w",keepf=T) boot.ci( cd4.boot, conf=0.90 ) abc.ci( cd4, eigen.fun, conf=0.9) Practical 5.3 (Amis data) ======================== amis1 <- amis[(amis$pair!=4)&(amis$pair!=6)&(amis$period!=3),] tapply(amis1$speed, list(amis1$period, amis1$warning, amis1$pair),quantile, 0.85) amis.fun <- function(data, i) { d <- data[i, ] d <- tapply(d$speed, list(d$period, d$warning, d$pair),quantile, 0.85) mean( (d[2,1, ] - d[1,1, ]) - (d[2,2, ] - d[1,2, ]) ) } str <- 4*(amis1$pair-1)+2*(amis$warning-1)+amis1$period unix.time( amis1.boot <- boot(amis1,amis.fun,R=99,strata=str) ) amis1.boot$t0 qqnorm(amis1.boot$t) abline(mean(amis1.boot$t),sqrt(var(amis1.boot$t)),lty=2) boot.ci(amis1.boot,type=c("basic","perc","norm"),conf=0.9) Practical 5.4 (Capability data) =============================== attach(capability) par(mfrow=c(2,2)) tsplot(y, ylim=c(5,6)) abline(h=5.79,lty=2); abline(h=5.49,lty=2) qqnorm(y) acf(y) acf(y,type="partial") capability.fun <- function( data, i, U=5.79, L=5.49, dk=2.236 ) { y <- data$y[i]; m <- mean(y); r5 <- apply(matrix(y,15,5), 1, function(y) diff(range(y)) ) s <- mean(r5)/dk; 2*min( (U-m)/s, (m-L)/s ) } capability.boot <- boot( capability, capability.fun, R=999 ) boot.ci(capability.boot,type=c("norm","basic","perc")) Practical 5.5 (CD4 data again) =============================== nested.corr <- function(data, w, t0, M ) { n <- nrow(data) i <- rep(1:n,round(n*w)) t <- corr.fun( data, w ) z <- (t[1]-t0)/sqrt(t[2]) nested.boot <- boot( data[i,], corr.fun, R=M, stype="w" ) z.nested <- (nested.boot$t[,1]-t[1])/sqrt(nested.boot$t[,2]) c( z, sum(z.nestedpoison.boot$t0) poison.gen1 <- function(data,mle) { i <- matrix(1:48,4,12,byrow=T) i <- apply(i,1,sample,replace=T,size=4) data$time <- mle$fit + mle$res[i] data } poison.boot <- boot( poisons, poison.fun, R=199, sim="parametric", ran.gen=poison.gen1, mle=poison.mle ) sum(poison.boot$t>poison.boot$t0) Practical 6.4 (Nuclear data) ======================== nuclear.glm <- glm(log(cost)~date+log(t1)+log(t2)+log(cap)+pr+ne +ct+bw+log(cum.n)+pt,data=nuclear) nuclear.diag <- glm.diag(nuclear.glm) nuke <- data.frame(nuclear,fit=fitted(nuclear.glm), res=nuclear.diag$res*nuclear.diag$sd) nuke.p <- nuke[32,] nuke.p$date <- 73 nuke.p$fit <- predict(nuclear.glm,nuke.p) nuke.pred <- function(data,i,i.p,d.p) { d <- data d$cost <- exp(d$fit+d$res[i]) d.glm <- glm(log(cost)~date+log(t1)+log(t2)+log(cap)+pr+ne +ct+bw+log(cum.n)+pt,data=d) predict(d.glm,d.p)-(d.p$fit+d$res[i.p]) } nuclear.boot.pred <- boot(nuke,nuke.pred,R=199,m=1,d.p=nuke.p) exp(nuke.p$fit-quantile(nuclear.boot.pred$t,c(0.025,0.975))) Practical 6.5 (Mammals data) ======================== cost <- function( y, mu=0 ) mean( (y-mu)^2 ) mammals.glm <- glm(log(brain)~log(body),data=mammals) muhat <- fitted(mammals.glm) app.err <- cost( mammals.glm$y, muhat ) mammals.diag <- glm.diag( mammals.glm ) cv.err <- mean( (mammals.glm$y-muhat)^2/(1-mammals.diag$h)^2 ) cv.err.6 <- cv.glm( mammals, mammals.glm, cost, K=6 ) mammals.pred.fun <- function( data, i, formula ) { d <- data[i,] d.glm <- glm(formula,data=d) D.F.hatF <- cost( log(data$brain), predict(d.glm,data) ) D.hatF.hatF <- cost( log(d$brain), fitted(d.glm) ) c( log(data$brain)-predict(d.glm,data), D.F.hatF - D.hatF.hatF ) } mam.boot <- boot( mammals, mammals.pred.fun, R=200, formula=formula(mammals.glm)) n <- nrow(mammals) err.boot <- app.err + mean(mam.boot$t[,n+1]) err.632 <- 0 mam.boot$f <- boot.array(mam.boot) for (i in 1:n) err.632 <- err.632 + cost( mam.boot$t[mam.boot$f[,i]==0,i])/n err.632 <- 0.368*app.err + 0.632*err.632 ord <- order(mammals.diag$res) mam.pred <- mam.boot$t[,ord] mam.fac <- factor(rep(1:n,rep(200,n)),labels=ord) plot(mam.fac, mam.pred,ylab="Prediction errors", xlab="Case ordered by residual") Practical 6.6 (Salinity data) ======================== salinity.rob.fun <- function(data,i) { data.i <- data[i,] ls.fit <- lm(sal~lag+trend+dis, data=data.i) l1.fit <- l1fit(data.i[,-1],data.i[,1]) lts.fit <- ltsreg(data.i[,-1],data.i[,1]) c(ls.fit$coef,l1.fit$coef,lts.fit$coef) } salinity.boot <- boot(salinity,salinity.rob.fun,R=1000) jack.after.boot(salinity.boot,index=4) jack.after.boot(salinity.boot,index=8) jack.after.boot(salinity.boot,index=12) salinity.quad.fun <- function(data, i) { data.i <- data[i, ] ls.fit <- lm(sal ~ lag+trend+poly(dis,2), data=data.i) ls.sum <- summary(ls.fit) ls.std <- sqrt(diag(ls.sum$cov))*ls.sum$sigma c(ls.fit$coef, ls.std) } quad.z <- salinity.boot$t0[5]/salinity.quad$t0[5] salinity.boot <- boot(salinity, salinity.quad.fun, R=99) quad.z.star <- (salinity.boot$t[,5]-salinity.boot$t0[5])/salinity.boot$t[,10] (1+sum(quad.z0.5 ) nodal.glm <- glm(r~stage+xray+acid,binomial,data=nodal) nodal.diag <- glm.diag( nodal.glm ) app.err <- cost( r, fitted(nodal.glm) ) cv.err <- cv.glm( nodal, nodal.glm, cost, K=53 )$delta cv.11.err <- cv.glm( nodal, nodal.glm, cost, K=11 )$delta nodal.pred.fun <- function( data, i, model ) { d <- data[i,] d.glm <- update(model,data=d) pred <- predict(d.glm,data,type="response") D.F.Fhat <- cost( data$r, pred ) D.Fhat.Fhat <- cost( d$r, fitted(d.glm) ) c( data$r-pred, D.F.Fhat - D.Fhat.Fhat ) } nodal.boot <- boot(nodal, nodal.pred.fun, R=200, model=nodal.glm) nodal.boot$f <- boot.array(nodal.boot) n <- nrow(nodal) err.boot <- mean( nodal.boot$t[,n+1] ) + app.err ord <- order(nodal.diag$res) nodal.pred <- nodal.boot$t[,ord] err.632 <- 0 n.632 <- NULL pred.632 <- NULL for (i in 1:n) { inds <- nodal.boot$f[,i]==0 err.632 <- err.632 + cost( nodal.pred[inds,i])/n n.632 <- c( n.632, sum( inds ) ) pred.632 <- c( pred.632, nodal.pred[inds,i] ) } err.632 <- 0.368*app.err + 0.632*err.632 nodal.fac <- factor(rep(1:n,n.632),labels=ord) plot(nodal.fac, pred.632,ylab="Prediction errors", xlab="Case ordered by residual") abline(h=-0.5,lty=2); abline(h=0.5,lty=2) Practical 7.5 (Cloth data) ======================== plot(cloth$x,cloth$y) cloth.glm <- glm(y~offset(log(x)),poisson,data=cloth) lines(cloth$x,fitted(cloth.glm)) summary(cloth.glm) cloth.diag <- glm.diag(cloth.glm) cloth.gam <- gam(y~s(log(x)),poisson,data=cloth) lines(cloth$x,fitted(cloth.gam),lty=2) summary(cloth.gam) cloth.gen <- function( data, fits ) { y <- rpois(n=nrow(data),fits) data.frame(x=data$x,y=y) } cloth.fun <- function( data ) { d.glm <- glm(y~offset(log(x)),poisson,data=data) d.gam <- gam(y~s(log(x)),poisson,data=data) c(deviance(d.glm),deviance(d.gam)) } cloth.boot <- boot( cloth, cloth.fun, sim="parametric", R=99, ran.gen=cloth.gen, mle=fitted(cloth.glm)) cloth1 <- data.frame(cloth,fits=fitted(cloth.glm),pearson=cloth.diag$rp) cloth.fun1 <- function( data, i ) { y <- data$fits+sqrt(data$fits)*data$pearson[i] y <- round(y) y[y<0] <- 0 d.glm <- glm(y~offset(log(data$x)),poisson) d.gam <- gam(y~s(log(data$x)),poisson) c(deviance(d.glm),deviance(d.gam)) } cloth.boot <- boot( cloth1, cloth.fun1, R=99) Practical 7.6 (Nitrofen data) ======================== nitro <- rbind(nitrofen,nitrofen,nitrofen,nitrofen,nitrofen) nitro <- rbind(nitro,nitro,nitro,nitro,nitro) nitro$conc <- seq(0,310,length=nrow(nitro)) attach(nitrofen) plot(conc,jitter(total),ylab="total") nitro.glm <- glm(total~conc+conc^2,poisson,data=nitrofen) lines(nitro$conc,predict(nitro.glm,nitro,"response"),lty=3) nitro.gam <- gam(total~s(conc,df=3),robust(poisson),data=nitrofen) lines(nitro$conc,predict(nitro.gam,nitro,"response")) nitro.fun <- function( data, i, nitro ) { assign("d" ,data[i,],frame=1) d.fit <- gam(total~s(conc,df=3),robust(poisson),data=d) f <- predict(d.fit,nitro,"response") f.gam <- max(nitro$conc[f>0.5*f[1]]) d.fit <- glm(total~conc+conc^2,poisson,data=d) f <- predict(d.fit,nitro,"response") f.glm <- max(nitro$conc[f>0.5*f[1]]) c(f.gam, f.glm) } nitro.boot <- boot( nitrofen, nitro.fun, R=499, strata=rep(1:5,rep(10,5)), nitro=nitro) boot.ci(nitro.boot,index=1,type=c("norm","basic","perc","bca")) boot.ci(nitro.boot,index=2,type=c("norm","basic","perc","bca")) nitro.test <- fitted(gam(total~s(conc,df=2.2),robust(poisson), data=nitrofen)) f <- predict(nitro.glm,nitro,"response") nitro.orig <- max(f) - f[1] res <- (nitrofen$total-nitro.test)/sqrt(1-0.1) nitro1 <- data.frame(nitrofen,res=res,fit=nitro.test) nitro1.fun <- function( data, i, nitro ) { assign("d" ,data[i,],frame=1) d$total <- round(d$fit+d$res[i]) d.fit <- glm(total~conc+conc^2,poisson,data=d) f <- predict(d.fit,nitro,"response") max(f)-f[1] } nitro1.boot <- boot( nitro1, nitro1.fun, R=99, strata=rep(1:5,rep(10,5)), nitro=nitro) (1+sum(nitro1.boot$t>nitro.orig))/(1+nitro1.boot$R) ========== Chapter 8 ========== Practical 8.1 (Lynx data) ======================== ts.plot(log(lynx)) lynx.ar <- ar(log(lynx)) lynx.ar$order lynx.fun <- function(tsb) { ar.fit <- ar(tsb, order.max=25) c(ar.fit$order, mean(tsb), tsb) } lynx.1 <- tsboot(log(lynx), lynx.fun, R=99, l=20, sim="fixed") tsplot(ts(lynx.1$t[1,3:116],start=c(1821,1)), main="Block simulation, l=20") boot.array(lynx.1)[1,] table(lynx.1$t[,1]) var(lynx.1$t[,2]) qqnorm(lynx.1$t[,2]); abline(mean(lynx.1$t[,2]),sqrt(var(lynx.1$t[,2])),lty=2) .Random.seed <- lynx.1$seed lynx.2 <- tsboot(log(lynx), lynx.fun, R=99, l=20, sim="geom") lynx.model <- list(order=c(lynx.ar$order,0,0),ar=lynx.ar$ar) lynx.res <- lynx.ar$resid[!is.na(lynx.ar$resid)] lynx.res <- lynx.res - mean(lynx.res) lynx.sim <- function(res,n.sim, ran.args) { rg1 <- function(n, res) sample(res, n, replace=T) ts.orig <- ran.args$ts ts.mod <- ran.args$model mean(ts.orig)+ts(arima.sim(model=ts.mod, n=n.sim, rand.gen=rg1, res=as.vector(res))) } .Random.seed <- lynx.1$seed lynx.3 <- tsboot(lynx.res, lynx.fun, R=99, sim="model", n.sim=114,ran.gen=lynx.sim, ran.args=list(ts=log(lynx), model=lynx.model)) lynx.black <- function(res, n.sim, ran.args) { ts.orig <- ran.args$ts ts.mod <- ran.args$model mean(ts.orig) + ts(arima.sim(model=ts.mod,n=n.sim,innov=res)) } .Random.seed <- lynx.1$seed lynx.1b <- tsboot(lynx.res, lynx.fun, R=99, l=20, sim="fixed", n.sim=114,ran.gen=lynx.black, ran.args=list(ts=log(lynx), model=lynx.model)) Practical 8.2 (Beaver data) ======================== fit <- function( data ) { X <- cbind(rep(1,100),data$activ) para <- list( X=X,data=data) assign("para",para,frame=1) d <- arima.mle(x=para$data$temp,model=list(ar=c(0.8)), xreg=para$X) res <- arima.diag(d,plot=F,std.resid=T)$std.resid res <- res[!is.na(res)] list(paras=c(d$model$ar,d$reg.coef,sqrt(d$sigma2)), res=res-mean(res),fit=X %*% d$reg.coef) } beaver.args <- fit( beaver ) white.noise <- function( n.sim, ts) sample(ts,size=n.sim,replace=T) beaver.gen <- function( ts, n.sim, ran.args ) { tsb <- ran.args$res fit <- ran.args$fit coeff <- ran.args$paras ts$temp <- fit + coeff[4]*arima.sim( model=list(ar=coeff[1]), n=n.sim,rand.gen=white.noise,ts=tsb ) ts } new.beaver <- beaver.gen( beaver, 100, beaver.args ) beaver.fun <- function(ts) fit(ts)$paras beaver.boot <- tsboot( beaver, beaver.fun, R=99,sim="model", n.sim=100,ran.gen=beaver.gen,ran.args=beaver.args) names(beaver.boot) beaver.boot$t0 beaver.boot$t[1:10,] Practical 8.2 (Sunspot data) ======================== sunspot.fun <- function( ts ) ts sunspot.1 <- tsboot(sunspot,sunspot.fun,R=2,sim="scramble") .Random.seed <- sunspot.1$seed sunspot.2 <- tsboot(sunspot,sunspot.fun,R=2,sim="scramble",norm=F) split.screen(c(3,2)) yl <- c(-50,200) screen(1); ts.plot(sunspot,ylim=yl); abline(h=0,lty=2) screen(3); tsplot(sunspot.1$t[1,],ylim=yl); abline(h=0,lty=2) screen(4); tsplot(sunspot.1$t[2,],ylim=yl); abline(h=0,lty=2) screen(5); tsplot(sunspot.2$t[1,],ylim=yl); abline(h=0,lty=2) screen(6); tsplot(sunspot.2$t[2,],ylim=yl); abline(h=0,lty=2) Practical 8.3 (Coal data) ======================== coal.est <- function( y, h=5) length(y)*ksmooth(y,bandwidth=2.7*h, kernel="n",x.points=seq(1851,1963,2))$y year <- seq(1851,1963,2) plot(year,coal.est(coal$date),type="l",ylab="intensity", ylim=c(0,6)) rug(coal) coal.fun <- function( data, i, h=5 ) coal.est(data[i], h ) unix.time( coal.boot <- boot( coal$date, coal.fun, R=199) ) A <- 0.5/sqrt(5*2*sqrt(pi)) Z <- sweep(sqrt(coal.boot$t),2,sqrt(coal.boot$t0))/A Z.max <- sort(apply(Z,1,max))[190] Z.min <- sort(apply(Z,1,min))[10] top <- (sqrt(coal.boot$t0)-A*Z.min)^2 bot <- (sqrt(coal.boot$t0)-A*Z.max)^2 lines(year,top,lty=2); lines(year,bot,lty=2) Z <- apply(Z,2,sort) Z.05 <- Z[10,] Z.95 <- Z[190,] plot(year,Z.05,type="l",ylab="Z",ylim=c(-3,3)) lines(year,Z.95) coal.gen <- function( data, n ) { i <- sample(1:n,size=rpois(n=1,lambda=n),replace=T) data[i] } coal.boot2 <- boot( coal$date, coal.est, R=199, sim="parametric", ran.gen=coal.gen, mle=nrow(coal)) ========== Chapter 9 ========== Practical 9.1 (gravity data) ======================== y <- rnorm(10) junk.fun <- function(y, i ) var(y[i]) junk <- boot(y, junk.fun, R=9, keepf=T) junk$f apply(junk$t,2,sum) junk <- boot(y, junk.fun, R=9, sim="balanced",keepf=T) junk$f apply(junk$t,2,sum) junk <- boot(y, junk.fun, R=9, sim="balanced",keepf=T,strata=rep(1:2,c(5,5))) junk$f apply(junk$t,2,sum) grav.fun <- function( data, i ) { d <- data[i,] m <- tapply(d$g,d$series,mean) v <- tapply(d$g,d$series,var) n <- table(d$series) v <- (n-1)*v/n c( sum(m*n/v)/sum(n/v), sum(n/v) ) } grav.bal <- boot( gravity, grav.fun, R=49, strata=gravity$series, sim="balanced", keepf=T) mean(grav.bal$t[,1])-grav.bal$t0[1] grav.ord <- boot( gravity, grav.fun, R=49, strata=gravity$series, keepf=T) control(grav.ord,bias.adj=T) R <- 19; nreps <- 40; bias <- matrix(,nreps, 3) for (i in 1:nreps) { grav.ord <- boot( gravity, grav.fun, R=R, strata=gravity$series, keepf=T) grav.bal <- boot( gravity, grav.fun, R=R, strata=gravity$series, sim="balanced", keepf=T) bias[i,] <- c( mean(grav.ord$t[,1])-grav.ord$t0[1], mean(grav.bal$t[,1])-grav.bal$t0[1], control(grav.ord,bias.adj=T) ) } bias apply(bias,2,mean) apply(bias, 2, var) split.screen(c(1,2)) screen(1); qqplot(bias[,1],bias[,2], xlab="ordinary",ylab="balanced"); abline(0,1,lty=2) screen(2); qqplot(bias[,2],bias[,3], xlab="balanced",ylab="adjusted"); abline(0,1,lty=2) grav.ord <- boot(gravity, grav.fun, R=999, strata=gravity$series, keepf=T) grav.L <- empinf(grav.ord,type="reg") tL <- linear.approx(grav.ord, grav.L,index=1) plot(tL,grav.ord$t[,1]) cor(tL,grav.ord$t[,1]) grav.cont <- control(grav.ord,L=grav.L,index=1) grav.cont$bias grav.cont$var grav.cont$quantiles Practical 9.2 (tau data) ======================== tau.w <- function( data, w ) { d <- data$rate*w; d <- tapply(d,data$decay,sum)/tapply(w,data$decay,sum) d[1]-sum(d[-1]) } tau.L <- empinf(data=tau, statistic=tau.w, strata=tau$decay) exp.tilt(tau.L,theta=c(14,18),t0=16.16) tau.tilt <- tilt.boot( tau, tau.w, R=c(199,100,100), strata=tau$decay, stype="w", L=tau.L, alpha=c(0.05,0.95)) split.screen(c(1,2)) screen(1); plot(tau.tilt$t,imp.weights(tau.tilt),log="y") screen(2); plot(tau.tilt$t,imp.weights(tau.tilt,def=F),log="y") imp.quantile(tau.tilt,alpha=c(0.05,0.95)) imp.quantile(tau.tilt,alpha=c(0.05,0.95),def=F) tau.freq <- tilt.boot( tau, tau.w, R=c(499,250,250), strata=tau$decay, stype="w", tilt=F, alpha=c(0.05,0.95) ) imp.quantile(tau.freq,alpha=c(0.05,0.95)) tau.test <- NULL for (irep in 1:10) { tau.boot <- boot( tau, tau.w, R=199, stype="w", strata=tau$decay, keepf=T ) q.ord <- sort( tau.boot$t )[c(20,180)] tau.tilt <- tilt.boot( tau, tau.w, R=c(99,50,50), strata=tau$decay, stype="w", L=tau.L, alpha=c(0.1,0.9)) q.tilt <- imp.quantile( tau.tilt, alpha=c(0.1, 0.9) )$raw tau.bal <- tilt.boot( tau, tau.w, R=c(99,50,50), strata=tau$decay, stype="w", L=tau.L, alpha=c(0.1,0.9), sim="balanced" ) q.bal <- imp.quantile( tau.bal, alpha=c(0.1, 0.9) )$raw tau.test <- rbind( tau.test, c( q.ord, q.tilt, q.bal ) ) } sqrt( apply(tau.test, 2, var) ) Practical 9.3 (Claridge data) ======================== clar.fun <- function( data, f ) { r <- corr( data, f/sum(f) ) n <- nrow(data) d <- data[rep(1:n,f),] us <- (d[,1]-mean(d[,1]))/sqrt(var(d[,1])) xs <- (d[,2]-mean(d[,2]))/sqrt(var(d[,2])) L <- us*xs - r*(us^2+xs^2)/2 v <- sum((L/n)^2) clar.t <- boot( d, corr, R=25, stype="w")$t i <- is.na(clar.t) clar.t <- clar.t[!i] c(r, v, mean(clar.t)-r, var(clar.t), sum(i)) } clar.boot <- boot( claridge, clar.fun, R=999, stype="f", keepf=T) split.screen(c(1,2)) screen(1) plot(clar.boot$t[,1],clar.boot$t[,3],pch=".",xlab="theta*",ylab="bias") lines(lowess(clar.boot$t[,1],clar.boot$t[,3],f=1/2),lwd=2) screen(2) plot(clar.boot$t[,1],sqrt(clar.boot$t[,4]),pch=".",xlab="theta*",ylab="SD") l <- lowess(clar.boot$t[,1],clar.boot$t[,4],f=1/2) lines(l$x,sqrt(l$y),lwd=2) clar.rec <- boot( claridge, corr, R=999, stype="w", keepf=T) IS.ests <- function( theta, boot.out, statistic, A=0.2) { f <- smooth.f(theta,boot.out,width=A); theta.f <- statistic(boot.out$data,f/sum(f)); IS.w <- imp.weights(boot.out,q=f) moms <- imp.moments(boot.out,t=boot.out$t[,1]-theta.f,w=IS.w ) c( theta, theta.f, moms$raw, moms$rat, moms$reg ) } IS.clar <- matrix(,41,8); theta <- seq(0,0.8,length=41) for (j in 1:41) IS.clar[j,] <- IS.ests(theta[j],clar.rec,corr) screen(1,new=F); lines(IS.clar[,2],IS.clar[,7]) lines(IS.clar[,2],IS.clar[,5],lty=3) lines(IS.clar[,2],IS.clar[,3],lty=2) screen(2, new=F) lines(IS.clar[,2],sqrt(IS.clar[,8])) lines(IS.clar[,2],sqrt(IS.clar[,6]),lty=3) lines(IS.clar[,2],sqrt(IS.clar[,4]),lty=2) Practical 9.4 (capability data) ======================== psi <- function( tt, r, c=2.236*(5.79-5.49) ) c-r*tt psi1 <-function( tt, r, c=2.236*(5.79-5.49) ) r det.psi <- function(tt, r, xi) { p <- exp(xi * psi(tt, r)) length(r) * abs(sum(p * psi1(tt,r))/sum(p)) } r5 <- apply(matrix(capability$y,15,5,byrow=T), 1, function(x) diff(range(x)) ) m <- 300; top <- 10; bot <- 4 sad <- matrix( , m, 3 ) th <- seq(bot,top,length=m) for (i in 1:m ) { sp <- saddle( A=psi( th[i], r5 ), u=0 ) sad[i,] <- c(th[i], sp$spa[1]*det.psi( th[i], r5, xi=sp$zeta.hat ), sp$spa[2] ) } plot(sad[,1],sad[,2],type="l",xlab="theta hat",ylab="PDF") theta.fun <- function( d, w, c=2.236*(5.79-5.49)) c*sum(w)/sum(d*w) capab.v <- var.linear(empinf(data=r5, statistic=theta.fun)) capab.t0 <- c(2.236*(5.79-5.49)/mean(r5),sqrt(capab.v)) Afn <- function(t, data, c=2.236*(5.79-5.49) ) c-t*data ufn <- function(t, data, c=2.236*(5.79-5.49) ) 0 capab.sp <- saddle.distn(A=Afn, u=ufn, t0=capab.t0, data=r5) capab.sp$quantiles r5 <- NULL for (j in 1:71) r5 <- c( r5, diff(range(capability$y[j:(j+4)]))) Afn <- function(t, data, c=0.236*(5.79-5.49)) cbind(c-t*data,1) capab.sp1 <- saddle.distn(A=Afn,u=ufn,u2=15,wdist="p", type="cond",t0=capab.t0,data=r5) capab.sp1$quantiles Practical 9.5 (Darwin data) ======================== K <- function( xi ) sum( log(cosh( xi*darwin$y ) ) )-xi*sum(darwin$y) K2 <- function( xi ) sum( darwin$y^2/cosh(xi*darwin$y)^2 ) darwin.saddle <- saddle(K.adj=K,K2=K2) darwin.saddle 1-darwin.saddle$spa[2] ========== Chapter 10 ========== Practical 10.1 (gravity data) ======================== attach(gravity) grav.EL <- EL.profile(g,tmin=70,tmax=85,n.t=51 ) plot(grav.EL[,1],exp(grav.EL[,2]),type="l",xlab="mu", ylab="empirical likelihood") lik.CI(grav.EL,lim=-0.5*qchisq(0.95,1)) gravs.EL <- EL.profile(g[series==1],n.t=21) plot(gravs.EL[,1],exp(gravs.EL[,2]),type="n",xlab="mu", ylab="empirical likelihood",xlim=range(g)) lines(gravs.EL[,1],exp(gravs.EL[,2]),lty=2) for (s in 2:8) { gravs.EL <- EL.profile(g[series==s],n.t=21); lines(gravs.EL[,1],exp(gravs.EL[,2]),lty=2) } lims <- matrix(NA,8,2) for (s in 1:8) { x <- g[series==s]; lims[s,] <- range(x) } mu.min <- max(lims[,1]); mu.max <- min(lims[,2]) gravs.EL <- EL.profile(g[series==1], tmin=mu.min,tmax=mu.max,n.t=21) gravs.EL.L <- gravs.EL[,2] gravs.EL.mu <- gravs.EL[,1] for (s in 2:8) gravs.EL.L <- gravs.EL.L + EL.profile(g[series==s], tmin=mu.min,tmax=mu.max,n.t=21)[,2] gravs.EL.L <- gravs.EL.L - max(gravs.EL.L); lines(gravs.EL.mu,exp(gravs.EL.L),lwd=2) lik.CI(cbind(gravs.EL.mu,gravs.EL.L),lim=-0.5*qchisq(0.95,1)) Practical 10.2 (Islay data) ======================== attach( islay ) th <- ifelse(theta>180,theta-360,theta) a.t <- function( th ) c( sin(th*pi/180), cos(th*pi/180) ) b.t <- function( th ) c( cos(th*pi/180), -sin(th*pi/180) ) y <- t(apply(matrix(theta, 18,1), 1, a.t )) thetahat <- function( y ) { m <- apply(y,2,sum) m <- m/sqrt(m[1]^2+m[2]^2) 180*atan(m[1]/m[2])/pi } thetahat(y) u.t <- function( y, th ) crossprod( b.t(th), t(y) ) islay.EL <- EL.profile( y, tmin=-100, tmax=120, n.t=40, u=u.t ) plot(islay.EL[,1],islay.EL[,2],type="l",xlab="theta", ylab="log empirical likelihood",ylim=c(-25,0)); points(th,rep(-25,18)); abline(h=-3.84/2,lty=2) lik.CI(islay.EL,lim=-0.5*qchisq(0.95,1)) islay.EEF <- EEF.profile( y, tmin=-100, tmax=120, n.t=40, u=u.t ) lines( islay.EEF[,1],islay.EEF[,2],lty=3) lik.CI(islay.EEF,lim=-0.5*qchisq(0.95,1)) islay.fun <- function( y, i, angle ) { u <- as.vector(u.t( y[i,], angle) ) z <- rep(0,length(u)) EEF.fit <- glm(z~u-1,poisson) W.EEF <- 2*sum(1-fitted(EEF.fit)) EL.loglik <- function(lambda) - sum(log(1 + lambda * u)) EL.score <- function(lambda) - sum(u/(1 + lambda * u)) assign("u",u,frame=1) EL.out <- nlmin(EL.loglik,0.001) W.EL <- -2*EL.loglik(EL.out$x) c( thetahat( y[i,] ), W.EL, W.EEF, EL.out$converged ) } islay.boot <- boot( y, islay.fun, R=999, stype="i",angle=thetahat(y)) islay.boot$R <- sum(islay.boot$t[,4]) islay.boot$t <- islay.boot$t[islay.boot$t[,4]==1,] apply(islay.boot$t[,2:3],2,quantile,0.95) Practical 10.3 (Airconditioning data) ====================================== air1 <- data.frame(hours=aircondit$hours,G=1) air.bayes.gen <- function( d, a ) { out <- d out$G <- rgamma(nrow(d),shape=a+2) out } air.bayes.fun <- function( d ) sum( d$hours*d$G )/sum( d$G ) air.bayesian <- boot( air1, air.bayes.fun, R=999 , sim="parametric", ran.gen=air.bayes.gen,mle=-1) plot(density(air.bayesian$t,n=100,width=25),type="l", xlab="theta",ylab="density",ylim=c(0,0.02)) kappa <- 0; lambda <- 0; kappa.post <- kappa+length(air1$hours) lambda.post <- lambda + sum(air1$hours) theta <- 30:300 lines(theta,lambda.post/theta^2*dgamma(lambda.post/theta,kappa.post),lty=2)