mike-lawrence · September 9, 2016 21:29
diff --git a/sim_and_sample.r b/sim_and_sample.r
 library(rstan)

 #set random seed for reproducibility
 set.seed(1) 

 #generate fake data

 #variables setting data size
 N = 30 #number of x-axis grid points
 M = 2 #number of noisy replicates per grid point
 #N=30, M=2 yields data that takes about 3s to sample @ 2e3 iterations

 noise = 1 #amount of noise added to each replicate

 #define the x-axis grid
 x = seq(-10,10,length.out=N)

 #define a wiggly function on x
 f = scale(sin(x)*dnorm(x,5,8))[,1]

 #generate a number of noisy replicates
 #(each row is a replicate)
 Y = matrix(
  f + rnorm(N*M,0,noise)
  , nrow = M
  , byrow = TRUE
 )

 #package for stan
 to_stan = list(
  nX = N
    , X = x
    , nY = M*N
    , Y = as.vector(t(Y))
    , XY = rep(1:N,times=M)
    , fudge = 1e-3
  )

 #compile the stan models
 tp0 = stan_model(file="tp0.stan")
 tp1 = stan_model(file="tp1.stan")
 tp2 = stan_model(file="tp2.stan")

 #define a function to sample all models the same way
 do_sampling = function(model,seed=999){
  sampling(
    model
    , data = to_stan
    , refresh = 0
    , iter = 2e3
    , chains = 1
    , cores = 1
    , seed = seed
  )  
  
 }

 #do just a single sampling run as a quick test that they actually run

 #sample tp0
 tp0_post <- do_sampling(model=tp0)
 alarm()
 print(
  tp0_post
  , pars = c('noise','eta','rho')
  , digits = 2
  , probs = c(.025,.975)
 )
 get_elapsed_time(tp0_post)


 #sample tp1
 tp1_post <- do_sampling(model=tp1)
 alarm()
 print(
  tp1_post
  , pars = c('noise','eta','rho')
  , digits = 2
  , probs = c(.025,.975)
 )
 get_elapsed_time(tp1_post)

 #sample tp2
 tp2_post <- do_sampling(model=tp2)
 alarm()
 print(
  tp2_post
  , pars = c('noise','eta','rho')
  , digits = 2
  , probs = c(.025,.975)
 )
 get_elapsed_time(tp2_post)


 #sample all the models many times, saving to lists
 out0 = list()
 out1 = list()
 out2 = list()
 t0 = rep(NA,times=10)
 t1 = rep(NA,times=10)
 t2 = rep(NA,times=10)
 for(i in 1:10){
  out0[[i]] = do_sampling(model=tp0,seed=i)
  out1[[i]] = do_sampling(model=tp1,seed=i) 
  out2[[i]] = do_sampling(model=tp2,seed=i) 
  t0[i] = sum(get_elapsed_time(out0[[i]]))
  t1[i] = sum(get_elapsed_time(out1[[i]]))
  t2[i] = sum(get_elapsed_time(out2[[i]]))
  print(paste('i:',i)) #for progress
  print( c(t0[i]-t1[i],t0[i]-t2[i]) , digits = 2 ) #compare times
  print( #compare samples
    c(
     all( extract(out0[[i]],'rho',permuted=F)==extract(out1[[i]],'rho',permuted=F) )
     , all( extract(out0[[i]],'rho',permuted=F)==extract(out2[[i]],'rho',permuted=F) )
    )
  )
 }

 #show timing stats
 print( c(mean(t0),sd(t0)), digits=2)
 print( c(mean(t1),sd(t1)), digits=2)
 print( c(mean(t2),sd(t2)), digits=2)

diff --git a/tp0.stan b/tp0.stan
 data {
  // nX: num unique X values
  int<lower=1> nX ; 
  // X: unique x values
  vector[nX] X ; 
  // nY: number of observations total
  int<lower=1> nY ;
  // Y: data 
  vector[nY] Y ;
  // XY: *index* of x value per observation in Y 
  int XY[nY] ;
  // fudge: a fudge factor for computation
  real<lower=0> fudge ;
 }
 transformed data {
  // dx: computing the pairwise distances between x values
  matrix[nX,nX] dx ;
  //off-diagonal
  for (i in 1:(nX-1)) {
    for (j in (i+1):nX) {
      dx[i,j] = -( X[i] - X[j] )^2  ;
      dx[j,i] = dx[i,j] ; // lower mirrors upper
    }
  }
  //diagonal
  for (nx in 1:nX){
    dx[nx,nx] = 0 ;
  }
 }
 parameters {
  // noise: sampling noise for replicates
  real<lower=0> noise ;
  // eta_intercept: GP parameter, scale of output (Y)
  real<lower=0> eta ;
  // rho_intercept: GP parameter, x-axis scale of influence between points (wiggliness)
  real<lower=0> rho ;
  // z: dummy variable for quicker multivariate normal computation for GP
  vector[nX] z ;
 }
 transformed parameters{
  // f: function values
  vector[nX] f ;
  {
    // L: cholesky decomposition
    matrix[nX,nX] L ;
    // covMat: covariance matrix
    matrix[nX,nX] covMat ;
    //  covMat off-diagonal
    for (i in 1:(nX-1)) {
      for (j in (i+1):nX) {
        covMat[i,j] = eta * exp(dx[i,j]*rho) ;
        covMat[j,i] = covMat[i,j] ; // lower mirrors upper
      }
    }
    // covMat diagonal: adding jitter to avoid computation issues ??
    for (nx in 1:nX){
      covMat[nx,nx] = eta + fudge ; 
    }  
    L = cholesky_decompose(covMat) ;
    // assert sampling of f as GP
    f = L * z ;
  }
 }
 model {
  // priors on GP kernel parameters
  eta ~ weibull(2,1) ;
  rho ~ student_t(4,0,1) ;
  // prior on noise
  noise ~ weibull(2,1) ;
  z ~ normal(0,1) ;
  // assert sampling of each replicate of Y as GP plus noise
  Y ~ normal(f[XY],noise) ;
 }
diff --git a/tp1.stan b/tp1.stan
 data {
  // nX: num unique X values
  int<lower=1> nX ; 
  // X: unique x values
  vector[nX] X ; 
  // nY: number of observations total
  int<lower=1> nY ;
  // Y: data 
  vector[nY] Y ;
  // XY: *index* of x value per observation in Y 
  int XY[nY] ;
  // fudge: a fudge factor for computation
  real<lower=0> fudge ;
 }
 transformed data {
  // dx: computing the pairwise distances between x values
  matrix[nX,nX] dx ;
  //off-diagonal
  for (i in 1:(nX-1)) {
    for (j in (i+1):nX) {
      dx[i,j] = -( X[i] - X[j] )^2  ;
      dx[j,i] = dx[i,j] ; // lower mirrors upper
    }
  }
  //diagonal
  for (nx in 1:nX){
    dx[nx,nx] = 0 ;
  }
 }
 parameters {
  // noise: sampling noise for replicates
  real<lower=0> noise ;
  // eta_intercept: GP parameter, scale of output (Y)
  real<lower=0> eta ;
  // rho_intercept: GP parameter, x-axis scale of influence between points (wiggliness)
  real<lower=0> rho ;
  // z: dummy variable for quicker multivariate normal computation for GP
  vector[nX] z ;
 }
 model {
  // f: function values
  vector[nX] f ;
  {
    // L: cholesky decomposition
    matrix[nX,nX] L ;
    // covMat: covariance matrix
    matrix[nX,nX] covMat ;
    //  covMat off-diagonal
    for (i in 1:(nX-1)) {
      for (j in (i+1):nX) {
        covMat[i,j] = eta * exp(dx[i,j]*rho) ;
        covMat[j,i] = covMat[i,j] ; // lower mirrors upper
      }
    }
    // covMat diagonal: adding jitter to avoid computation issues ??
    for (nx in 1:nX){
      covMat[nx,nx] = eta + fudge ; 
    }  
    L = cholesky_decompose(covMat) ;
    // assert sampling of f as GP
    f = L * z ;
  }
  // priors on GP kernel parameters
  eta ~ weibull(2,1) ;
  rho ~ student_t(4,0,1) ;
  // prior on noise
  noise ~ weibull(2,1) ;
  z ~ normal(0,1) ;
  // assert sampling of each replicate of Y as GP plus noise
  Y ~ normal(f[XY],noise) ;
 }
diff --git a/tp2.stan b/tp2.stan
 data {
  // nX: num unique X values
  int<lower=1> nX ;
  // X: unique x values
  vector[nX] X ;
  // nY: number of observations total
  int<lower=1> nY ;
  // Y: data
  vector[nY] Y ;
  // XY: *index* of x value per observation in Y
  int XY[nY] ;
  // fudge: a fudge factor for computation
  real<lower=0> fudge ;
 }
 transformed data {
  // dx: computing the pairwise distances between x values
  matrix[nX,nX] dx ;
  //off-diagonal
  for (i in 1:(nX-1)) {
    for (j in (i+1):nX) {
      dx[i,j] = -( X[i] - X[j] )^2  ;
      dx[j,i] = dx[i,j] ; // lower mirrors upper
    }
  }
  //diagonal
  for (nx in 1:nX){
    dx[nx,nx] = 0 ;
  }
 }
 parameters {
  // noise: sampling noise for replicates
  real<lower=0> noise ;
  // eta_intercept: GP parameter, scale of output (Y)
  real<lower=0> eta ;
  // rho_intercept: GP parameter, x-axis scale of influence between points (wiggliness)
  real<lower=0> rho ;
  // z: dummy variable for quicker multivariate normal computation for GP
  vector[nX] z ;
 }
 model {
  // f: function values
  vector[nX] f ;
  // priors on GP kernel parameters
  eta ~ weibull(2,1) ;
  rho ~ student_t(4,0,1) ;
  // prior on noise
  noise ~ weibull(2,1) ;
  z ~ normal(0,1) ;
  {
    // L: cholesky decomposition
    matrix[nX,nX] L ;
    // covMat: covariance matrix
    matrix[nX,nX] covMat ;
    //  covMat off-diagonal
    for (i in 1:(nX-1)) {
      for (j in (i+1):nX) {
        covMat[i,j] = eta * exp(dx[i,j]*rho) ;
        covMat[j,i] = covMat[i,j] ; // lower mirrors upper
      }
    }
    // covMat diagonal: adding jitter to avoid computation issues ??
    for (nx in 1:nX){
      covMat[nx,nx] = eta + fudge ;
    }
    L = cholesky_decompose(covMat) ;
    // assert sampling of f as GP
    f = L * z ;
  }
  // assert sampling of each replicate of Y as GP plus noise
  Y ~ normal(f[XY],noise) ;
 }
	library(rstan)

	#set random seed for reproducibility
	set.seed(1)

	#generate fake data

	#variables setting data size
	N = 30 #number of x-axis grid points
	M = 2 #number of noisy replicates per grid point
	#N=30, M=2 yields data that takes about 3s to sample @ 2e3 iterations

	noise = 1 #amount of noise added to each replicate

	#define the x-axis grid
	x = seq(-10,10,length.out=N)

	#define a wiggly function on x
	f = scale(sin(x)*dnorm(x,5,8))[,1]

	#generate a number of noisy replicates
	#(each row is a replicate)
	Y = matrix(
	f + rnorm(N*M,0,noise)
	, nrow = M
	, byrow = TRUE
	)

	#package for stan
	to_stan = list(
	nX = N
	, X = x
	, nY = M*N
	, Y = as.vector(t(Y))
	, XY = rep(1:N,times=M)
	, fudge = 1e-3
	)

	#compile the stan models
	tp0 = stan_model(file="tp0.stan")
	tp1 = stan_model(file="tp1.stan")
	tp2 = stan_model(file="tp2.stan")

	#define a function to sample all models the same way
	do_sampling = function(model,seed=999){
	sampling(
	model
	, data = to_stan
	, refresh = 0
	, iter = 2e3
	, chains = 1
	, cores = 1
	, seed = seed
	)

	}

	#do just a single sampling run as a quick test that they actually run

	#sample tp0
	tp0_post <- do_sampling(model=tp0)
	alarm()
	print(
	tp0_post
	, pars = c('noise','eta','rho')
	, digits = 2
	, probs = c(.025,.975)
	)
	get_elapsed_time(tp0_post)


	#sample tp1
	tp1_post <- do_sampling(model=tp1)
	alarm()
	print(
	tp1_post
	, pars = c('noise','eta','rho')
	, digits = 2
	, probs = c(.025,.975)
	)
	get_elapsed_time(tp1_post)

	#sample tp2
	tp2_post <- do_sampling(model=tp2)
	alarm()
	print(
	tp2_post
	, pars = c('noise','eta','rho')
	, digits = 2
	, probs = c(.025,.975)
	)
	get_elapsed_time(tp2_post)


	#sample all the models many times, saving to lists
	out0 = list()
	out1 = list()
	out2 = list()
	t0 = rep(NA,times=10)
	t1 = rep(NA,times=10)
	t2 = rep(NA,times=10)
	for(i in 1:10){
	out0[[i]] = do_sampling(model=tp0,seed=i)
	out1[[i]] = do_sampling(model=tp1,seed=i)
	out2[[i]] = do_sampling(model=tp2,seed=i)
	t0[i] = sum(get_elapsed_time(out0[[i]]))
	t1[i] = sum(get_elapsed_time(out1[[i]]))
	t2[i] = sum(get_elapsed_time(out2[[i]]))
	print(paste('i:',i)) #for progress
	print( c(t0[i]-t1[i],t0[i]-t2[i]) , digits = 2 ) #compare times
	print( #compare samples
	c(
	all( extract(out0[[i]],'rho',permuted=F)==extract(out1[[i]],'rho',permuted=F) )
	, all( extract(out0[[i]],'rho',permuted=F)==extract(out2[[i]],'rho',permuted=F) )
	)
	)
	}

	#show timing stats
	print( c(mean(t0),sd(t0)), digits=2)
	print( c(mean(t1),sd(t1)), digits=2)
	print( c(mean(t2),sd(t2)), digits=2)
	data {
	// nX: num unique X values
	int<lower=1> nX ;
	// X: unique x values
	vector[nX] X ;
	// nY: number of observations total
	int<lower=1> nY ;
	// Y: data
	vector[nY] Y ;
	// XY: index of x value per observation in Y
	int XY[nY] ;
	// fudge: a fudge factor for computation
	real<lower=0> fudge ;
	}
	transformed data {
	// dx: computing the pairwise distances between x values
	matrix[nX,nX] dx ;
	//off-diagonal
	for (i in 1:(nX-1)) {
	for (j in (i+1):nX) {
	dx[i,j] = -( X[i] - X[j] )^2 ;
	dx[j,i] = dx[i,j] ; // lower mirrors upper
	}
	}
	//diagonal
	for (nx in 1:nX){
	dx[nx,nx] = 0 ;
	}
	}
	parameters {
	// noise: sampling noise for replicates
	real<lower=0> noise ;
	// eta_intercept: GP parameter, scale of output (Y)
	real<lower=0> eta ;
	// rho_intercept: GP parameter, x-axis scale of influence between points (wiggliness)
	real<lower=0> rho ;
	// z: dummy variable for quicker multivariate normal computation for GP
	vector[nX] z ;
	}
	transformed parameters{
	// f: function values
	vector[nX] f ;
	{
	// L: cholesky decomposition
	matrix[nX,nX] L ;
	// covMat: covariance matrix
	matrix[nX,nX] covMat ;
	// covMat off-diagonal
	for (i in 1:(nX-1)) {
	for (j in (i+1):nX) {
	covMat[i,j] = eta * exp(dx[i,j]*rho) ;
	covMat[j,i] = covMat[i,j] ; // lower mirrors upper
	}
	}
	// covMat diagonal: adding jitter to avoid computation issues ??
	for (nx in 1:nX){
	covMat[nx,nx] = eta + fudge ;
	}
	L = cholesky_decompose(covMat) ;
	// assert sampling of f as GP
	f = L * z ;
	}
	}
	model {
	// priors on GP kernel parameters
	eta ~ weibull(2,1) ;
	rho ~ student_t(4,0,1) ;
	// prior on noise
	noise ~ weibull(2,1) ;
	z ~ normal(0,1) ;
	// assert sampling of each replicate of Y as GP plus noise
	Y ~ normal(f[XY],noise) ;
	}