eric-pedersen · September 25, 2020 16:21 · dill · Oct 12, 2020
diff --git a/gam.mh fixed examples.R b/gam.mh fixed examples.R
 library(mgcv)
 set.seed(3);n <- 400

 ############################################
 ## First example: simulated Tweedie model...
 ############################################

 dat <- gamSim(1,n=n,dist="poisson",scale=.2)
 dat$y <- rTweedie(exp(dat$f),p=1.3,phi=.5) ## Tweedie response
 b <- gam(y~s(x0)+s(x1)+s(x2)+s(x3),family=tw(),
         data=dat,method="REML")

 ## simulate directly from Gaussian approximate posterior...
 br <- rmvn(1000,coef(b),vcov(b))

 ## Alternatively use MH sampling...
 br <- gam.mh(b,thin=2,ns=2000,rw.scale=.15)$bs
 ## If 'coda' installed, can check effective sample size
 ## require(coda);effectiveSize(as.mcmc(br))

 ## Now compare simulation results and Gaussian approximation for
 ## smooth term confidence intervals...
 x <- seq(0,1,length=100)
 pd <- data.frame(x0=x,x1=x,x2=x,x3=x)
 X <- predict(b,newdata=pd,type="lpmatrix")
 par(mfrow=c(2,2))
 for(i in 1:4) {
  plot(b,select=i,scale=0,scheme=1)
  ii <- b$smooth[[i]]$first.para:b$smooth[[i]]$last.para
  ff <- X[,ii]%*%t(br[,ii]) #not multipled by br here previously
  fq <- apply(ff,1,quantile,probs=c(.025,.16,.84,.975))
  lines(x,fq[1,],col=2,lty=2);lines(x,fq[4,],col=2,lty=2)
  lines(x,fq[2,],col=2);lines(x,fq[3,],col=2)
 }

 ###############################################################
 ## Second example, where Gaussian approximation is a failure...
 ###############################################################

 y <- c(rep(0, 89), 1, 0, 1, 0, 0, 1, rep(0, 13), 1, 0, 0, 1, 
       rep(0, 10), 1, 0, 0, 1, 1, 0, 1, rep(0,4), 1, rep(0,3),  
       1, rep(0, 3), 1, rep(0, 10), 1, rep(0, 4), 1, 0, 1, 0, 0, 
       rep(1, 4), 0, rep(1, 5), rep(0, 4), 1, 1, rep(0, 46))
 set.seed(3);x <- sort(c(0:10*5,rnorm(length(y)-11)*20+100))
 b <- gam(y ~ s(x, k = 15),method = 'REML', family = binomial)
 br <- gam.mh(b,thin=2,ns=2000,rw.scale=.4)$bs
 X <- model.matrix(b)
 par(mfrow=c(1,1))
 plot(x, y, col = rgb(0,0,0,0.25), ylim = c(0,1))
 ff <- X%*%t(br) #not multipled by br here previously
 linv <- b$family$linkinv
 ## Get intervals for the curve on the response scale...
 fq <- linv(apply(ff,1,quantile,probs=c(.025,.16,.5,.84,.975)))
 lines(x,fq[1,],col=2,lty=2);lines(x,fq[5,],col=2,lty=2)
 lines(x,fq[2,],col=2);lines(x,fq[4,],col=2)
 lines(x,fq[3,],col=4)
 ## Compare to the Gaussian posterior approximation
 fv <- predict(b,se=TRUE)
 lines(x,linv(fv$fit))
 lines(x,linv(fv$fit-2*fv$se.fit),lty=3)
 lines(x,linv(fv$fit+2*fv$se.fit),lty=3)
 ## ... Notice the useless 95% CI (black dotted) based on the
 ## Gaussian approximation!
	library(mgcv)
	set.seed(3);n <- 400

	############################################
	## First example: simulated Tweedie model...
	############################################

	dat <- gamSim(1,n=n,dist="poisson",scale=.2)
	dat$y <- rTweedie(exp(dat$f),p=1.3,phi=.5) ## Tweedie response
	b <- gam(y~s(x0)+s(x1)+s(x2)+s(x3),family=tw(),
	data=dat,method="REML")

	## simulate directly from Gaussian approximate posterior...
	br <- rmvn(1000,coef(b),vcov(b))

	## Alternatively use MH sampling...
	br <- gam.mh(b,thin=2,ns=2000,rw.scale=.15)$bs
	## If 'coda' installed, can check effective sample size
	## require(coda);effectiveSize(as.mcmc(br))

	## Now compare simulation results and Gaussian approximation for
	## smooth term confidence intervals...
	x <- seq(0,1,length=100)
	pd <- data.frame(x0=x,x1=x,x2=x,x3=x)
	X <- predict(b,newdata=pd,type="lpmatrix")
	par(mfrow=c(2,2))
	for(i in 1:4) {
	plot(b,select=i,scale=0,scheme=1)
	ii <- b$smooth[[i]]$first.para:b$smooth[[i]]$last.para
	ff <- X[,ii]%*%t(br[,ii]) #not multipled by br here previously
	fq <- apply(ff,1,quantile,probs=c(.025,.16,.84,.975))
	lines(x,fq[1,],col=2,lty=2);lines(x,fq[4,],col=2,lty=2)
	lines(x,fq[2,],col=2);lines(x,fq[3,],col=2)
	}

	###############################################################
	## Second example, where Gaussian approximation is a failure...
	###############################################################

	y <- c(rep(0, 89), 1, 0, 1, 0, 0, 1, rep(0, 13), 1, 0, 0, 1,
	rep(0, 10), 1, 0, 0, 1, 1, 0, 1, rep(0,4), 1, rep(0,3),
	1, rep(0, 3), 1, rep(0, 10), 1, rep(0, 4), 1, 0, 1, 0, 0,
	rep(1, 4), 0, rep(1, 5), rep(0, 4), 1, 1, rep(0, 46))
	set.seed(3);x <- sort(c(0:105,rnorm(length(y)-11)20+100))
	b <- gam(y ~ s(x, k = 15),method = 'REML', family = binomial)
	br <- gam.mh(b,thin=2,ns=2000,rw.scale=.4)$bs
	X <- model.matrix(b)
	par(mfrow=c(1,1))
	plot(x, y, col = rgb(0,0,0,0.25), ylim = c(0,1))
	ff <- X%*%t(br) #not multipled by br here previously
	linv <- b$family$linkinv
	## Get intervals for the curve on the response scale...
	fq <- linv(apply(ff,1,quantile,probs=c(.025,.16,.5,.84,.975)))
	lines(x,fq[1,],col=2,lty=2);lines(x,fq[5,],col=2,lty=2)
	lines(x,fq[2,],col=2);lines(x,fq[4,],col=2)
	lines(x,fq[3,],col=4)
	## Compare to the Gaussian posterior approximation
	fv <- predict(b,se=TRUE)
	lines(x,linv(fv$fit))
	lines(x,linv(fv$fit-2*fv$se.fit),lty=3)
	lines(x,linv(fv$fit+2*fv$se.fit),lty=3)
	## ... Notice the useless 95% CI (black dotted) based on the
	## Gaussian approximation!