Skip to content

Instantly share code, notes, and snippets.

@mike-lawrence
Last active September 9, 2016 21:29
Show Gist options
  • Save mike-lawrence/96d14bac6264b5f4eff4f3d7001acec9 to your computer and use it in GitHub Desktop.
Save mike-lawrence/96d14bac6264b5f4eff4f3d7001acec9 to your computer and use it in GitHub Desktop.
Weird Stan behaviour
library(rstan)
#set random seed for reproducibility
set.seed(1)
#generate fake data
#variables setting data size
N = 30 #number of x-axis grid points
M = 2 #number of noisy replicates per grid point
#N=30, M=2 yields data that takes about 3s to sample @ 2e3 iterations
noise = 1 #amount of noise added to each replicate
#define the x-axis grid
x = seq(-10,10,length.out=N)
#define a wiggly function on x
f = scale(sin(x)*dnorm(x,5,8))[,1]
#generate a number of noisy replicates
#(each row is a replicate)
Y = matrix(
f + rnorm(N*M,0,noise)
, nrow = M
, byrow = TRUE
)
#package for stan
to_stan = list(
nX = N
, X = x
, nY = M*N
, Y = as.vector(t(Y))
, XY = rep(1:N,times=M)
, fudge = 1e-3
)
#compile the stan models
tp0 = stan_model(file="tp0.stan")
tp1 = stan_model(file="tp1.stan")
tp2 = stan_model(file="tp2.stan")
#define a function to sample all models the same way
do_sampling = function(model,seed=999){
sampling(
model
, data = to_stan
, refresh = 0
, iter = 2e3
, chains = 1
, cores = 1
, seed = seed
)
}
#do just a single sampling run as a quick test that they actually run
#sample tp0
tp0_post <- do_sampling(model=tp0)
alarm()
print(
tp0_post
, pars = c('noise','eta','rho')
, digits = 2
, probs = c(.025,.975)
)
get_elapsed_time(tp0_post)
#sample tp1
tp1_post <- do_sampling(model=tp1)
alarm()
print(
tp1_post
, pars = c('noise','eta','rho')
, digits = 2
, probs = c(.025,.975)
)
get_elapsed_time(tp1_post)
#sample tp2
tp2_post <- do_sampling(model=tp2)
alarm()
print(
tp2_post
, pars = c('noise','eta','rho')
, digits = 2
, probs = c(.025,.975)
)
get_elapsed_time(tp2_post)
#sample all the models many times, saving to lists
out0 = list()
out1 = list()
out2 = list()
t0 = rep(NA,times=10)
t1 = rep(NA,times=10)
t2 = rep(NA,times=10)
for(i in 1:10){
out0[[i]] = do_sampling(model=tp0,seed=i)
out1[[i]] = do_sampling(model=tp1,seed=i)
out2[[i]] = do_sampling(model=tp2,seed=i)
t0[i] = sum(get_elapsed_time(out0[[i]]))
t1[i] = sum(get_elapsed_time(out1[[i]]))
t2[i] = sum(get_elapsed_time(out2[[i]]))
print(paste('i:',i)) #for progress
print( c(t0[i]-t1[i],t0[i]-t2[i]) , digits = 2 ) #compare times
print( #compare samples
c(
all( extract(out0[[i]],'rho',permuted=F)==extract(out1[[i]],'rho',permuted=F) )
, all( extract(out0[[i]],'rho',permuted=F)==extract(out2[[i]],'rho',permuted=F) )
)
)
}
#show timing stats
print( c(mean(t0),sd(t0)), digits=2)
print( c(mean(t1),sd(t1)), digits=2)
print( c(mean(t2),sd(t2)), digits=2)
data {
// nX: num unique X values
int<lower=1> nX ;
// X: unique x values
vector[nX] X ;
// nY: number of observations total
int<lower=1> nY ;
// Y: data
vector[nY] Y ;
// XY: *index* of x value per observation in Y
int XY[nY] ;
// fudge: a fudge factor for computation
real<lower=0> fudge ;
}
transformed data {
// dx: computing the pairwise distances between x values
matrix[nX,nX] dx ;
//off-diagonal
for (i in 1:(nX-1)) {
for (j in (i+1):nX) {
dx[i,j] = -( X[i] - X[j] )^2 ;
dx[j,i] = dx[i,j] ; // lower mirrors upper
}
}
//diagonal
for (nx in 1:nX){
dx[nx,nx] = 0 ;
}
}
parameters {
// noise: sampling noise for replicates
real<lower=0> noise ;
// eta_intercept: GP parameter, scale of output (Y)
real<lower=0> eta ;
// rho_intercept: GP parameter, x-axis scale of influence between points (wiggliness)
real<lower=0> rho ;
// z: dummy variable for quicker multivariate normal computation for GP
vector[nX] z ;
}
transformed parameters{
// f: function values
vector[nX] f ;
{
// L: cholesky decomposition
matrix[nX,nX] L ;
// covMat: covariance matrix
matrix[nX,nX] covMat ;
// covMat off-diagonal
for (i in 1:(nX-1)) {
for (j in (i+1):nX) {
covMat[i,j] = eta * exp(dx[i,j]*rho) ;
covMat[j,i] = covMat[i,j] ; // lower mirrors upper
}
}
// covMat diagonal: adding jitter to avoid computation issues ??
for (nx in 1:nX){
covMat[nx,nx] = eta + fudge ;
}
L = cholesky_decompose(covMat) ;
// assert sampling of f as GP
f = L * z ;
}
}
model {
// priors on GP kernel parameters
eta ~ weibull(2,1) ;
rho ~ student_t(4,0,1) ;
// prior on noise
noise ~ weibull(2,1) ;
z ~ normal(0,1) ;
// assert sampling of each replicate of Y as GP plus noise
Y ~ normal(f[XY],noise) ;
}
data {
// nX: num unique X values
int<lower=1> nX ;
// X: unique x values
vector[nX] X ;
// nY: number of observations total
int<lower=1> nY ;
// Y: data
vector[nY] Y ;
// XY: *index* of x value per observation in Y
int XY[nY] ;
// fudge: a fudge factor for computation
real<lower=0> fudge ;
}
transformed data {
// dx: computing the pairwise distances between x values
matrix[nX,nX] dx ;
//off-diagonal
for (i in 1:(nX-1)) {
for (j in (i+1):nX) {
dx[i,j] = -( X[i] - X[j] )^2 ;
dx[j,i] = dx[i,j] ; // lower mirrors upper
}
}
//diagonal
for (nx in 1:nX){
dx[nx,nx] = 0 ;
}
}
parameters {
// noise: sampling noise for replicates
real<lower=0> noise ;
// eta_intercept: GP parameter, scale of output (Y)
real<lower=0> eta ;
// rho_intercept: GP parameter, x-axis scale of influence between points (wiggliness)
real<lower=0> rho ;
// z: dummy variable for quicker multivariate normal computation for GP
vector[nX] z ;
}
model {
// f: function values
vector[nX] f ;
{
// L: cholesky decomposition
matrix[nX,nX] L ;
// covMat: covariance matrix
matrix[nX,nX] covMat ;
// covMat off-diagonal
for (i in 1:(nX-1)) {
for (j in (i+1):nX) {
covMat[i,j] = eta * exp(dx[i,j]*rho) ;
covMat[j,i] = covMat[i,j] ; // lower mirrors upper
}
}
// covMat diagonal: adding jitter to avoid computation issues ??
for (nx in 1:nX){
covMat[nx,nx] = eta + fudge ;
}
L = cholesky_decompose(covMat) ;
// assert sampling of f as GP
f = L * z ;
}
// priors on GP kernel parameters
eta ~ weibull(2,1) ;
rho ~ student_t(4,0,1) ;
// prior on noise
noise ~ weibull(2,1) ;
z ~ normal(0,1) ;
// assert sampling of each replicate of Y as GP plus noise
Y ~ normal(f[XY],noise) ;
}
data {
// nX: num unique X values
int<lower=1> nX ;
// X: unique x values
vector[nX] X ;
// nY: number of observations total
int<lower=1> nY ;
// Y: data
vector[nY] Y ;
// XY: *index* of x value per observation in Y
int XY[nY] ;
// fudge: a fudge factor for computation
real<lower=0> fudge ;
}
transformed data {
// dx: computing the pairwise distances between x values
matrix[nX,nX] dx ;
//off-diagonal
for (i in 1:(nX-1)) {
for (j in (i+1):nX) {
dx[i,j] = -( X[i] - X[j] )^2 ;
dx[j,i] = dx[i,j] ; // lower mirrors upper
}
}
//diagonal
for (nx in 1:nX){
dx[nx,nx] = 0 ;
}
}
parameters {
// noise: sampling noise for replicates
real<lower=0> noise ;
// eta_intercept: GP parameter, scale of output (Y)
real<lower=0> eta ;
// rho_intercept: GP parameter, x-axis scale of influence between points (wiggliness)
real<lower=0> rho ;
// z: dummy variable for quicker multivariate normal computation for GP
vector[nX] z ;
}
model {
// f: function values
vector[nX] f ;
// priors on GP kernel parameters
eta ~ weibull(2,1) ;
rho ~ student_t(4,0,1) ;
// prior on noise
noise ~ weibull(2,1) ;
z ~ normal(0,1) ;
{
// L: cholesky decomposition
matrix[nX,nX] L ;
// covMat: covariance matrix
matrix[nX,nX] covMat ;
// covMat off-diagonal
for (i in 1:(nX-1)) {
for (j in (i+1):nX) {
covMat[i,j] = eta * exp(dx[i,j]*rho) ;
covMat[j,i] = covMat[i,j] ; // lower mirrors upper
}
}
// covMat diagonal: adding jitter to avoid computation issues ??
for (nx in 1:nX){
covMat[nx,nx] = eta + fudge ;
}
L = cholesky_decompose(covMat) ;
// assert sampling of f as GP
f = L * z ;
}
// assert sampling of each replicate of Y as GP plus noise
Y ~ normal(f[XY],noise) ;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment