Last active
September 9, 2016 21:29
-
-
Save mike-lawrence/96d14bac6264b5f4eff4f3d7001acec9 to your computer and use it in GitHub Desktop.
Weird Stan behaviour
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(rstan) | |
#set random seed for reproducibility | |
set.seed(1) | |
#generate fake data | |
#variables setting data size | |
N = 30 #number of x-axis grid points | |
M = 2 #number of noisy replicates per grid point | |
#N=30, M=2 yields data that takes about 3s to sample @ 2e3 iterations | |
noise = 1 #amount of noise added to each replicate | |
#define the x-axis grid | |
x = seq(-10,10,length.out=N) | |
#define a wiggly function on x | |
f = scale(sin(x)*dnorm(x,5,8))[,1] | |
#generate a number of noisy replicates | |
#(each row is a replicate) | |
Y = matrix( | |
f + rnorm(N*M,0,noise) | |
, nrow = M | |
, byrow = TRUE | |
) | |
#package for stan | |
to_stan = list( | |
nX = N | |
, X = x | |
, nY = M*N | |
, Y = as.vector(t(Y)) | |
, XY = rep(1:N,times=M) | |
, fudge = 1e-3 | |
) | |
#compile the stan models | |
tp0 = stan_model(file="tp0.stan") | |
tp1 = stan_model(file="tp1.stan") | |
tp2 = stan_model(file="tp2.stan") | |
#define a function to sample all models the same way | |
do_sampling = function(model,seed=999){ | |
sampling( | |
model | |
, data = to_stan | |
, refresh = 0 | |
, iter = 2e3 | |
, chains = 1 | |
, cores = 1 | |
, seed = seed | |
) | |
} | |
#do just a single sampling run as a quick test that they actually run | |
#sample tp0 | |
tp0_post <- do_sampling(model=tp0) | |
alarm() | |
print( | |
tp0_post | |
, pars = c('noise','eta','rho') | |
, digits = 2 | |
, probs = c(.025,.975) | |
) | |
get_elapsed_time(tp0_post) | |
#sample tp1 | |
tp1_post <- do_sampling(model=tp1) | |
alarm() | |
print( | |
tp1_post | |
, pars = c('noise','eta','rho') | |
, digits = 2 | |
, probs = c(.025,.975) | |
) | |
get_elapsed_time(tp1_post) | |
#sample tp2 | |
tp2_post <- do_sampling(model=tp2) | |
alarm() | |
print( | |
tp2_post | |
, pars = c('noise','eta','rho') | |
, digits = 2 | |
, probs = c(.025,.975) | |
) | |
get_elapsed_time(tp2_post) | |
#sample all the models many times, saving to lists | |
out0 = list() | |
out1 = list() | |
out2 = list() | |
t0 = rep(NA,times=10) | |
t1 = rep(NA,times=10) | |
t2 = rep(NA,times=10) | |
for(i in 1:10){ | |
out0[[i]] = do_sampling(model=tp0,seed=i) | |
out1[[i]] = do_sampling(model=tp1,seed=i) | |
out2[[i]] = do_sampling(model=tp2,seed=i) | |
t0[i] = sum(get_elapsed_time(out0[[i]])) | |
t1[i] = sum(get_elapsed_time(out1[[i]])) | |
t2[i] = sum(get_elapsed_time(out2[[i]])) | |
print(paste('i:',i)) #for progress | |
print( c(t0[i]-t1[i],t0[i]-t2[i]) , digits = 2 ) #compare times | |
print( #compare samples | |
c( | |
all( extract(out0[[i]],'rho',permuted=F)==extract(out1[[i]],'rho',permuted=F) ) | |
, all( extract(out0[[i]],'rho',permuted=F)==extract(out2[[i]],'rho',permuted=F) ) | |
) | |
) | |
} | |
#show timing stats | |
print( c(mean(t0),sd(t0)), digits=2) | |
print( c(mean(t1),sd(t1)), digits=2) | |
print( c(mean(t2),sd(t2)), digits=2) | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data { | |
// nX: num unique X values | |
int<lower=1> nX ; | |
// X: unique x values | |
vector[nX] X ; | |
// nY: number of observations total | |
int<lower=1> nY ; | |
// Y: data | |
vector[nY] Y ; | |
// XY: *index* of x value per observation in Y | |
int XY[nY] ; | |
// fudge: a fudge factor for computation | |
real<lower=0> fudge ; | |
} | |
transformed data { | |
// dx: computing the pairwise distances between x values | |
matrix[nX,nX] dx ; | |
//off-diagonal | |
for (i in 1:(nX-1)) { | |
for (j in (i+1):nX) { | |
dx[i,j] = -( X[i] - X[j] )^2 ; | |
dx[j,i] = dx[i,j] ; // lower mirrors upper | |
} | |
} | |
//diagonal | |
for (nx in 1:nX){ | |
dx[nx,nx] = 0 ; | |
} | |
} | |
parameters { | |
// noise: sampling noise for replicates | |
real<lower=0> noise ; | |
// eta_intercept: GP parameter, scale of output (Y) | |
real<lower=0> eta ; | |
// rho_intercept: GP parameter, x-axis scale of influence between points (wiggliness) | |
real<lower=0> rho ; | |
// z: dummy variable for quicker multivariate normal computation for GP | |
vector[nX] z ; | |
} | |
transformed parameters{ | |
// f: function values | |
vector[nX] f ; | |
{ | |
// L: cholesky decomposition | |
matrix[nX,nX] L ; | |
// covMat: covariance matrix | |
matrix[nX,nX] covMat ; | |
// covMat off-diagonal | |
for (i in 1:(nX-1)) { | |
for (j in (i+1):nX) { | |
covMat[i,j] = eta * exp(dx[i,j]*rho) ; | |
covMat[j,i] = covMat[i,j] ; // lower mirrors upper | |
} | |
} | |
// covMat diagonal: adding jitter to avoid computation issues ?? | |
for (nx in 1:nX){ | |
covMat[nx,nx] = eta + fudge ; | |
} | |
L = cholesky_decompose(covMat) ; | |
// assert sampling of f as GP | |
f = L * z ; | |
} | |
} | |
model { | |
// priors on GP kernel parameters | |
eta ~ weibull(2,1) ; | |
rho ~ student_t(4,0,1) ; | |
// prior on noise | |
noise ~ weibull(2,1) ; | |
z ~ normal(0,1) ; | |
// assert sampling of each replicate of Y as GP plus noise | |
Y ~ normal(f[XY],noise) ; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data { | |
// nX: num unique X values | |
int<lower=1> nX ; | |
// X: unique x values | |
vector[nX] X ; | |
// nY: number of observations total | |
int<lower=1> nY ; | |
// Y: data | |
vector[nY] Y ; | |
// XY: *index* of x value per observation in Y | |
int XY[nY] ; | |
// fudge: a fudge factor for computation | |
real<lower=0> fudge ; | |
} | |
transformed data { | |
// dx: computing the pairwise distances between x values | |
matrix[nX,nX] dx ; | |
//off-diagonal | |
for (i in 1:(nX-1)) { | |
for (j in (i+1):nX) { | |
dx[i,j] = -( X[i] - X[j] )^2 ; | |
dx[j,i] = dx[i,j] ; // lower mirrors upper | |
} | |
} | |
//diagonal | |
for (nx in 1:nX){ | |
dx[nx,nx] = 0 ; | |
} | |
} | |
parameters { | |
// noise: sampling noise for replicates | |
real<lower=0> noise ; | |
// eta_intercept: GP parameter, scale of output (Y) | |
real<lower=0> eta ; | |
// rho_intercept: GP parameter, x-axis scale of influence between points (wiggliness) | |
real<lower=0> rho ; | |
// z: dummy variable for quicker multivariate normal computation for GP | |
vector[nX] z ; | |
} | |
model { | |
// f: function values | |
vector[nX] f ; | |
{ | |
// L: cholesky decomposition | |
matrix[nX,nX] L ; | |
// covMat: covariance matrix | |
matrix[nX,nX] covMat ; | |
// covMat off-diagonal | |
for (i in 1:(nX-1)) { | |
for (j in (i+1):nX) { | |
covMat[i,j] = eta * exp(dx[i,j]*rho) ; | |
covMat[j,i] = covMat[i,j] ; // lower mirrors upper | |
} | |
} | |
// covMat diagonal: adding jitter to avoid computation issues ?? | |
for (nx in 1:nX){ | |
covMat[nx,nx] = eta + fudge ; | |
} | |
L = cholesky_decompose(covMat) ; | |
// assert sampling of f as GP | |
f = L * z ; | |
} | |
// priors on GP kernel parameters | |
eta ~ weibull(2,1) ; | |
rho ~ student_t(4,0,1) ; | |
// prior on noise | |
noise ~ weibull(2,1) ; | |
z ~ normal(0,1) ; | |
// assert sampling of each replicate of Y as GP plus noise | |
Y ~ normal(f[XY],noise) ; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data { | |
// nX: num unique X values | |
int<lower=1> nX ; | |
// X: unique x values | |
vector[nX] X ; | |
// nY: number of observations total | |
int<lower=1> nY ; | |
// Y: data | |
vector[nY] Y ; | |
// XY: *index* of x value per observation in Y | |
int XY[nY] ; | |
// fudge: a fudge factor for computation | |
real<lower=0> fudge ; | |
} | |
transformed data { | |
// dx: computing the pairwise distances between x values | |
matrix[nX,nX] dx ; | |
//off-diagonal | |
for (i in 1:(nX-1)) { | |
for (j in (i+1):nX) { | |
dx[i,j] = -( X[i] - X[j] )^2 ; | |
dx[j,i] = dx[i,j] ; // lower mirrors upper | |
} | |
} | |
//diagonal | |
for (nx in 1:nX){ | |
dx[nx,nx] = 0 ; | |
} | |
} | |
parameters { | |
// noise: sampling noise for replicates | |
real<lower=0> noise ; | |
// eta_intercept: GP parameter, scale of output (Y) | |
real<lower=0> eta ; | |
// rho_intercept: GP parameter, x-axis scale of influence between points (wiggliness) | |
real<lower=0> rho ; | |
// z: dummy variable for quicker multivariate normal computation for GP | |
vector[nX] z ; | |
} | |
model { | |
// f: function values | |
vector[nX] f ; | |
// priors on GP kernel parameters | |
eta ~ weibull(2,1) ; | |
rho ~ student_t(4,0,1) ; | |
// prior on noise | |
noise ~ weibull(2,1) ; | |
z ~ normal(0,1) ; | |
{ | |
// L: cholesky decomposition | |
matrix[nX,nX] L ; | |
// covMat: covariance matrix | |
matrix[nX,nX] covMat ; | |
// covMat off-diagonal | |
for (i in 1:(nX-1)) { | |
for (j in (i+1):nX) { | |
covMat[i,j] = eta * exp(dx[i,j]*rho) ; | |
covMat[j,i] = covMat[i,j] ; // lower mirrors upper | |
} | |
} | |
// covMat diagonal: adding jitter to avoid computation issues ?? | |
for (nx in 1:nX){ | |
covMat[nx,nx] = eta + fudge ; | |
} | |
L = cholesky_decompose(covMat) ; | |
// assert sampling of f as GP | |
f = L * z ; | |
} | |
// assert sampling of each replicate of Y as GP plus noise | |
Y ~ normal(f[XY],noise) ; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment