Skip to content

Instantly share code, notes, and snippets.

@jeffeaton
Created February 27, 2019 12:10
Show Gist options
  • Save jeffeaton/f1c686b1d4c3cb50bac6d0cdca846c47 to your computer and use it in GitHub Desktop.
Save jeffeaton/f1c686b1d4c3cb50bac6d0cdca846c47 to your computer and use it in GitHub Desktop.
working example of shape constrained additive models from scam package in Stan
library(scam)
library(rstan)
library(ggplot2)
set.seed(0)
n <- 200
x1 <- runif(n)*6-3
f1 <- 3*exp(-x1^2) # unconstrained term
f1 <- (f1-min(f1))/(max(f1)-min(f1)) # function scaled to have range [0,1]
x2 <- runif(n)*4-1;
f2 <- exp(4*x2)/(1+exp(4*x2)) # monotone increasing smooth
f2 <- (f2-min(f2))/(max(f2)-min(f2)) # function scaled to have range [0,1]
f <- f1+f2
y <- f+rnorm(n)*0.1
dat <- data.frame(x1=x1,x2=x2,y=y)
b <- scam(y ~ s(x1, k=15, bs="cr", m=2) + s(x2, k=25, bs="mpi", m=2),
family = gaussian(link="identity"), data=dat, not.exp=FALSE)
plot(b, pages=1)
#' fit in stan
sm1 <- smoothCon(s(x1, k= 15, bs = "cr"), data = dat, absorb.cons = TRUE, scale.penalty = TRUE)[[1]]
sm2 <- smoothCon(s(x2, k= 25, bs = "mpi"), data = dat, absorb.cons = TRUE, scale.penalty = TRUE)[[1]] # absorb.cons = TRUE does nothing for constrained smooth
code <- "
data {
int<lower=1> N; // number of observations
vector[N] Y; // response variable
// data of smooth s(x1)
int nk_s1; // number of knots
int rank_s1; // rank of smoother
matrix[N, nk_s1] X_s1; // model matrix
matrix[nk_s1, nk_s1] S_s1; // penalty matrix
// data of smooth s(x2)
int nk_s2; // number of knots
int rank_s2; // rank of smoother
matrix[N, nk_s2] X_s2; // model matrix
matrix[nk_s2, nk_s2] S_s2; // penalty matrix
}
parameters {
real beta0; // intercept
// parameters for smooth s(x1)
vector[nk_s1] beta_s1;
real<lower=0> sd_s1;
// parameters for smooth s(x2)
vector[nk_s2] beta_s2;
real<lower=0> sd_s2;
// residual standard deviation
real<lower=0> sigma; // residual SD
}
transformed parameters {
vector[nk_s2] beta_tilde_s2 = exp(beta_s2);
}
model {
vector[N] mu = beta0 + X_s1 * beta_s1 + X_s2 * beta_tilde_s2;
// priors (taken from brms defaults)
target += student_t_lpdf(beta0 | 3, 0, 10);
// penalty for s(x1)
target += -rank_s1 * log(sd_s1) - 1 / (2 * sd_s1 * sd_s1) * quad_form(S_s1, beta_s1);
target += student_t_lpdf(sd_s1 | 3, 0, 10);
// penalty for s(x2)
target += -rank_s2 * log(sd_s2) - 1 / (2 * sd_s2 * sd_s2) * quad_form(S_s2, beta_s2);
target += student_t_lpdf(sd_s2 | 3, 0, 10);
target += student_t_lpdf(sigma | 3, 0, 10);
// likelihood
target += normal_lpdf(Y | mu, sigma);
}
"
sdata <- list(N = nrow(dat),
Y = dat$y,
##
## sm1
nk_s1 = ncol(sm1$X),
rank_s1 = sum(sm1$rank),
X_s1 = sm1$X,
S_s1 = Reduce("+", sm1$S),
##
## sm1
nk_s2 = ncol(sm2$X),
rank_s2 = sum(sm2$rank),
X_s2 = sm2$X,
S_s2 = Reduce("+", sm2$S))
fit <- stan(model_code = code, data = sdata, control = list(adapt_delta = 0.95))
x1_pred <- data.frame(x1 = seq(-3, 3, 0.1))
X1pred <- PredictMat(sm1, x1_pred)
s1pred <- X1pred %*% t(extract(fit)$beta_s1)
x1_pred$mean = rowMeans(s1pred)
x1_pred$median = apply(s1pred, 1, median)
x1_pred$lower = apply(s1pred, 1, quantile, 0.015)
x1_pred$upper = apply(s1pred, 1, quantile, 0.975)
ggplot(x1_pred, aes(x1)) +
geom_ribbon(aes(ymin = lower, ymax = upper), alpha = 0.3, fill = 1, color = NA) +
geom_line(aes(y = mean), color = 1, size = 1) +
geom_line(aes(y = median), color = 4, size = 1)
x2_pred <- data.frame(x2 = seq(-1, 3, 0.1))
X2pred <- PredictMat(sm2, x2_pred)[ ,-1]
s2pred <- X2pred %*% t(extract(fit)$beta_tilde_s2)
x2_pred$mean = rowMeans(s2pred)
x2_pred$median = apply(s2pred, 1, median)
x2_pred$lower = apply(s2pred, 1, quantile, 0.025)
x2_pred$upper = apply(s2pred, 1, quantile, 0.975)
ggplot(x2_pred, aes(x2)) +
geom_ribbon(aes(ymin = lower, ymax = upper), alpha = 0.3, fill = 1, color = NA) +
geom_line(aes(y = mean), color = 2, size = 1) +
geom_line(aes(y = median), color = 4, size = 1)
@jeffeaton
Copy link
Author

Some notes:

  • Based on first example in ?scam
  • Probably not optimal parameterisation. The standard diagonalised parameterisation used for smooth terms by brms and rstanarm won't work with out of the box due to the $\beta$ -> $\beta_tilde$ transformation for shape constrained terms.
  • This example with brms doesn't work because it doesn't account for the transformation $\beta$ -> $\beta_tilde$: https://groups.google.com/forum/#!topic/brms-users/hYF6jVEvOvU
  • Think something might be wrong with how the intercept / constraint is handled in the s(x2) plot. Need to review source code for scam. Answer might be in the updated script in the google list post.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment