Skip to content

Instantly share code, notes, and snippets.

@MatsuuraKentaro
Last active June 26, 2017 03:35
Show Gist options
  • Save MatsuuraKentaro/228bc17d945413f26ed5cef39b4f6ab5 to your computer and use it in GitHub Desktop.
Save MatsuuraKentaro/228bc17d945413f26ed5cef39b4f6ab5 to your computer and use it in GitHub Desktop.
Comparison of LOOCV and WAIC
data {
int D;
int N;
matrix[N,D] X;
vector[N] Y;
}
parameters {
vector[D] b;
real<lower=0> sigma;
}
transformed parameters {
vector[N] mu;
mu = X*b;
}
model {
Y ~ normal(mu, sigma);
}
generated quantities {
vector[N] log_lik;
vector[N] y_pred;
for (n in 1:N) {
log_lik[n] = normal_lpdf(Y[n] | mu[n], sigma);
y_pred[n] = normal_rng(mu[n], sigma);
}
}
data {
int D;
int N;
matrix[N,D] X;
int<lower=0, upper=1> Y[N];
}
parameters {
vector[D] b;
}
transformed parameters {
vector[N] mu;
mu = X*b;
}
model {
b ~ student_t(4, 0, 1);
Y ~ bernoulli_logit(mu);
}
generated quantities {
vector[N] log_lik;
int y_pred[N];
for (n in 1:N) {
log_lik[n] = bernoulli_logit_lpmf(Y[n] | mu[n]);
y_pred[n] = bernoulli_rng(inv_logit(mu[n]));
}
}
data {
int G;
int N;
vector[N] Y;
int N2G[N];
}
parameters {
real mu0;
real<lower=0> s0;
vector[G] mu;
real<lower=0> sigma;
}
model {
mu0 ~ student_t(4, 0, 10);
s0 ~ student_t(4, 0, 10);
mu ~ normal(mu0, s0);
Y ~ normal(mu[N2G], sigma);
}
generated quantities {
vector[N] log_lik;
vector[G] y_pred;
for (n in 1:N)
log_lik[n] = normal_lpdf(Y[n] | mu[N2G[n]], sigma);
for (g in 1:G)
y_pred[g] = normal_rng(mu[g], sigma);
}
functions {
real f_fix(real[] theta, real x) {
return(normal_lpdf(x | theta[1], theta[2]));
}
real f(real Y, real[] theta, real x) {
return(normal_lpdf(Y | x, theta[3]));
}
vector log_f_fix(real[] theta, real a, real b, int M) {
vector[M+1] lp;
real h ;
h = (b-a)/M;
lp[1] = f_fix(theta, a);
for (m in 1:(M/2))
lp[2*m] = log(4) + f_fix(theta, a + h*(2*m-1));
for (m in 1:(M/2-1))
lp[2*m+1] = log(2) + f_fix(theta, a + h*2*m);
lp[M+1] = f_fix(theta, b);
return(lp);
}
vector log_f(real Y, real[] theta, real a, real b, int M) {
vector[M+1] lp;
real h;
h = (b-a)/M;
lp[1] = f(Y, theta, a);
for (m in 1:(M/2))
lp[2*m] = f(Y, theta, a + h*(2*m-1));
for (m in 1:(M/2-1))
lp[2*m+1] = f(Y, theta, a + h*2*m);
lp[M+1] = f(Y, theta, b);
return(lp);
}
}
data {
int G;
int N;
vector[N] Y;
int N2G[N];
}
parameters {
real mu0;
real<lower=0> s0;
vector[G] mu;
real<lower=0> sigma;
}
transformed parameters {
real theta[3];
theta[1] = mu0;
theta[2] = s0;
theta[3] = sigma;
}
model {
mu0 ~ student_t(4, 0, 10);
s0 ~ student_t(4, 0, 10);
mu ~ normal(mu0, s0);
Y ~ normal(mu[N2G], sigma);
}
generated quantities {
vector[N] log_lik;
real mu_pred;
real y_pred;
int M;
M = 100;
{
vector[M+1] fix;
vector[M+1] rest;
fix = log_f_fix(theta, mu0-5*s0, mu0+5*s0, M);
for (n in 1:N) {
rest = log_f(Y[n], theta, mu0-5*s0, mu0+5*s0, M);
log_lik[n] = log(10*s0/M/3) + log_sum_exp(fix + rest);
}
}
mu_pred = normal_rng(mu0, s0);
y_pred = normal_rng(mu_pred, sigma);
}
library(rstan)
stanmodel <- stan_model(file='model/model1.stan')
N_sim <- 1000
D <- 3
b <- c(1.3, -3.1, 0.7)
SD <- 2.5
set.seed(123)
N <- 100
X <- cbind(1, matrix(runif(N*(D-1), -3, 3), N, (D-1)))
Mu <- X %*% b
result <- t(sapply(1:N_sim, function(seed) {
set.seed(seed)
Y <- rnorm(N, Mu, SD)
data <- list(N=N, D=D, X=X, Y=Y)
fit <- sampling(stanmodel, data=data, pars=c('log_lik', 'y_pred'), iter=11000, warmup=1000, seed=123, refresh=-1)
ms <- rstan::extract(fit)
error_by_sample <- t(sapply(1:N, function(n) {
dens <- KernSmooth::bkde(ms$y_pred[,n])
f_pred <- approxfun(dens$x, dens$y, yleft=1e-12, yright=1e-12)
f_true <- function(x) dnorm(x=x, mean=Mu[n], sd=SD)
f_en <- function(x) f_true(x)*(-log(f_true(x)))
f_ge <- function(x) f_true(x)*(-log(ifelse(f_pred(x) <= 0, 1e-12, f_pred(x))))
en <- pracma::romberg(f_en, a=Mu[n]-6*SD, b=Mu[n]+6*SD)$value
ge <- pracma::romberg(f_ge, a=Mu[n]-6*SD, b=Mu[n]+6*SD)$value
c(ge=ge, en=en)
}))
en <- mean(error_by_sample[,'en'])
ge <- mean(error_by_sample[,'ge'])
looic <- loo::loo(ms$log_lik)$looic/(2*N)
waic <- loo::waic(ms$log_lik)$waic/(2*N)
c(ge=ge, looic=looic, waic=waic, en=en)
}))
library(rstan)
stanmodel <- stan_model(file='model/model3.stan')
N_sim <- 1000
D <- 3
b <- c(0.8, -1.1, 0.1)
set.seed(123)
N <- 100
X <- cbind(1, matrix(runif(N*(D-1), -3, 3), N, (D-1)))
logistic <- function(x) 1/(1+exp(-x))
Mu <- logistic(X %*% b)
result <- t(sapply(1:N_sim, function(seed) {
set.seed(seed)
Y <- rbinom(N, size=1, prob=Mu)
data <- list(N=N, D=D, X=X, Y=Y)
fit <- sampling(stanmodel, pars=c('log_lik', 'y_pred'), data=data, iter=11000, warmup=1000, seed=123, refresh=-1)
ms <- rstan::extract(fit)
N_mcmc <- length(ms$lp__)
error_by_sample <- t(sapply(1:N, function(n) {
post <- table(factor(ms$y_pred[,n], levels=0:1))/N_mcmc
f_pred <- function(x) dbinom(x=x, size=1, prob=post['1'])
f_true <- function(x) dbinom(x=x, size=1, prob=Mu[n])
f_en <- function(x) f_true(x)*(-log(f_true(x)))
f_ge <- function(x) f_true(x)*(-log(f_pred(x)))
en <- sum(sapply(0:1, function(x) f_en(x)))
ge <- sum(sapply(0:1, function(x) f_ge(x)))
c(ge=ge, en=en)
}))
en <- mean(error_by_sample[,'en'])
ge <- mean(error_by_sample[,'ge'])
looic <- loo::loo(ms$log_lik)$looic/(2*N)
waic <- loo::waic(ms$log_lik)$waic/(2*N)
c(ge=ge, looic=looic, waic=waic, en=en)
}))
library(rstan)
stanmodel <- stan_model(file='model/model5a.stan')
N_sim <- 1000
mu0 <- 0
SD0 <- 10.0
SD <- 2.0
G <- 10
N <- 130
NbyG <- rep(N/G, G)
N2G <- data.frame(N=1:N, G=rep(1:G, times=NbyG))
G2N_list <- split(N2G$N, N2G$G)
result <- t(sapply(1:N_sim, function(seed) {
set.seed(seed)
Mu <- rnorm(G, mean=mu0, sd=SD0)
Y <- rnorm(N, mean=Mu[N2G$G], sd=SD)
data <- list(N=N, G=G, NbyG=NbyG, N2G=N2G$G, Y=Y)
fit <- sampling(stanmodel, pars=c('log_lik', 'y_pred'), data=data, iter=11000, warmup=1000, seed=123, refresh=-1)
ms <- rstan::extract(fit)
error_by_group <- t(sapply(1:G, function(g) {
dens <- KernSmooth::bkde(ms$y_pred[,g])
f_pred <- approxfun(dens$x, dens$y, yleft=1e-12, yright=1e-12)
f_true <- function(x) dnorm(x=x, mean=Mu[g], sd=SD)
f_en <- function(x) f_true(x)*(-log(f_true(x)))
f_ge <- function(x) f_true(x)*(-log(ifelse(f_pred(x) <= 0, 1e-12, f_pred(x))))
en <- pracma::romberg(f_en, a=Mu[g]-6*SD, b=Mu[g]+6*SD)$value
ge <- pracma::romberg(f_ge, a=Mu[g]-6*SD, b=Mu[g]+6*SD)$value
c(ge=ge, en=en)
}))
en <- sum(error_by_group[,'en'])
ge <- sum(error_by_group[,'ge'])
looic <- sum(sapply(1:G, function(g) { ns <- G2N_list[[g]]; loo::loo(ms$log_lik[,ns])$looic/(2*length(ns)) }))
waic <- sum(sapply(1:G, function(g) { ns <- G2N_list[[g]]; loo::waic(ms$log_lik[,ns])$waic/(2*length(ns)) }))
c(ge=ge, looic=looic, waic=waic, en=en)
}))
library(rstan)
stanmodel <- stan_model(file='model/model5b.stan')
N_sim <- 1000
mu0 <- 0
SD0 <- 10.0
SD <- 2.0
G <- 10
N <- 130
NbyG <- rep(N/G, G)
N2G <- data.frame(N=1:N, G=rep(1:G, times=NbyG))
G2N_list <- split(N2G$N, N2G$G)
result <- t(sapply(1:N_sim, function(seed) {
set.seed(seed)
Mu <- rnorm(G, mean=mu0, sd=SD0)
Y <- rnorm(N, mean=Mu[N2G$G], sd=SD)
data <- list(N=N, G=G, NbyG=NbyG, N2G=N2G$G, Y=Y)
fit <- sampling(stanmodel, pars=c('log_lik', 'y_pred'), data=data, iter=11000, warmup=1000, seed=123, refresh=-1)
ms <- rstan::extract(fit)
dens <- KernSmooth::bkde(ms$y_pred)
f_pred <- approxfun(dens$x, dens$y, yleft=1e-12, yright=1e-12)
f <- function(mu, x) dnorm(x=x, mean=mu, sd=SD) * dnorm(x=mu, mean=mu0, sd=SD0)
f_true <- function(xs) sapply(xs, function(x) integrate(f, lower=mu0-6*SD0, upper=mu0+6*SD0, x)$value)
f_en <- function(x) f_true(x)*(-log(f_true(x)))
f_ge <- function(x) f_true(x)*(-log(ifelse(f_pred(x) <= 0, 1e-12, f_pred(x))))
en <- pracma::romberg(f_en, a=mu0-6*SD0, b=mu0+6*SD0)$value
ge <- pracma::romberg(f_ge, a=mu0-6*SD0, b=mu0+6*SD0)$value
looic <- loo::loo(ms$log_lik)$looic/(2*N)
waic <- loo::waic(ms$log_lik)$waic/(2*N)
c(ge=ge, looic=looic, waic=waic, en=en)
}))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment