Created
December 7, 2017 14:26
-
-
Save unaoya/308edbde04b774e49287026a0a969b16 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(MASS) | |
library(rstan) | |
rstan_options(auto_write = TRUE) | |
options(mc.cores = parallel::detectCores()) | |
waic <- function(loglik){ | |
te <- -mean(log(colMeans(exp(log_lik)))) | |
fv <- mean(colMeans(log_lik^2) - colMeans(log_lik)^2) | |
waic <- te + fv | |
return(waic) | |
} | |
wbic <- function(loglik){ | |
wbic <- -mean(rowSums(log_lik)) | |
return(wbic) | |
} | |
rng1 <- function(N, mu1, mu2, Sigma1, Sigma2){ | |
y1 <- mvrnorm(n = 0.5 * N, mu = mu1, Sigma = Sigma1) | |
y2 <- mvrnorm(n = 0.5 * N, mu = mu2, Sigma = Sigma2) | |
y <- rbind(y1, y2) | |
d <- list(N = N, y = y) | |
return(d) | |
} | |
rng2 <- function(N, mu, Sigma0, Sigma1, Sigma2){ | |
y0 <- mvrnorm(n = 0.5 * N, mu = mu, Sigma = Sigma0) | |
y1 <- mvrnorm(n = 0.25 * N, mu = mu, Sigma = Sigma1) | |
y2 <- mvrnorm(n = 0.25 * N, mu = mu, Sigma = Sigma2) | |
y <- rbind(y0, y1, y2) | |
d <- list(N = N, y = y) | |
return(d) | |
} | |
#真の分布のパラメータ | |
N <- 30 | |
mu1 <- c(0.1,0) | |
mu2 <- c(0,0.1) | |
Sigma0 <- matrix(data = c(1,0,0,1), nrow = 2) | |
Sigma1 <- matrix(data = c(2,0,0,0.1), nrow = 2) | |
Sigma2 <- matrix(data = c(0.1,0,0,2), nrow = 2) | |
iter <- 100 | |
results <- data.frame(waic1 = rep(0,iter), | |
wbic1 = rep(0,iter), | |
waic2 = rep(0,iter), | |
wbic2 = rep(0,iter)) | |
for(i in 1:iter){ | |
d <- rng1(N, mu1, mu2, Sigma1, Sigma2) | |
fit <- stan(file = 'model1.stan', data = d, | |
iter = 6000, warmup = 3000) | |
log_lik <- extract(fit)$log_likelihood | |
results$waic1[i] <- waic(log_lik) | |
results$wbic1[i] <- wbic(log_lik) | |
fit2 <- stan(file = 'model2.stan', data = d, | |
iter = 6000, warmup = 3000) | |
log_lik <- extract(fit2)$log_likelihood | |
results$waic2[i] <- waic(log_lik) | |
results$wbic2[i] <- wbic(log_lik) | |
print(i) | |
} | |
results | |
fit | |
fit2 | |
stan_trace(fit2, pars = "Sigma1") | |
stan_trace(fit2, pars = "Sigma2") | |
N <- 100 | |
iter <- 30 | |
results100 <- data.frame(waic1 = rep(0,iter), | |
wbic1 = rep(0,iter), | |
waic2 = rep(0,iter), | |
wbic2 = rep(0,iter)) | |
for(i in 1:iter){ | |
d <- rng1(N, mu1, mu2, Sigma1, Sigma2) | |
fit <- stan(file = 'model1.stan', data = d, | |
iter = 6000, warmup = 3000) | |
log_lik <- extract(fit)$log_likelihood | |
results100$waic1[i] <- waic(log_lik) | |
results100$wbic1[i] <- wbic(log_lik) | |
fit2 <- stan(file = 'model2.stan', data = d, | |
iter = 6000, warmup = 3000) | |
log_lik <- extract(fit2)$log_likelihood | |
results100$waic2[i] <- waic(log_lik) | |
results100$wbic2[i] <- wbic(log_lik) | |
print(i) | |
} | |
select.waic <- c() | |
for(i in 1:50){ | |
select.waic[i] <- ifelse(results$waic1[i] < results$waic2[i], 1, 2) | |
} | |
select.wbic <- c() | |
for(i in 1:50){ | |
select.wbic[i] <- ifelse(results$wbic1[i] < results$wbic2[i], 1, 2) | |
} | |
library(ggplot2) | |
library(reshape2) | |
df100 <- read.csv(file = "result100.csv") | |
df30 <- read.csv(file = "result30.csv") | |
sum(df100$select.waic==1) | |
sum(df100$select.wbic==1) | |
sum(df30$select.waic==1) | |
sum(df30$select.wbic==1) | |
df.waic.30 <- data.frame(model1 = df30[,2], model2 = df30[,4], samples = as.factor(rep(30, 100))) | |
df.waic.100 <- data.frame(model1 = df100[,2], model2 = df100[,4], samples = as.factor(rep(100, 50))) | |
df.wbic.30 <- data.frame(model1 = df30[,3], model2 = df30[,5], samples = as.factor(rep(30, 100))) | |
df.wbic.100 <- data.frame(model1 = df100[,3], model2 = df100[,5], samples = as.factor(rep(100, 50))) | |
df.waic <- rbind(melt(df.waic.30), melt(df.waic.100)) | |
df.wbic <- rbind(melt(df.wbic.30), melt(df.wbic.100)) | |
df.wbic | |
g.waic <- ggplot(df.waic, aes(x = samples, y = value)) | |
g.waic <- g.waic + geom_boxplot(aes(colour = variable)) | |
g.waic <- g.waic + ggtitle("WAIC") | |
plot(g.waic) | |
g.wbic <- ggplot(df.wbic, aes(x = samples, y = value)) | |
g.wbic <- g.wbic + geom_boxplot(aes(colour = variable)) | |
g.wbic <- g.wbic + ggtitle("WBIC") | |
plot(g.wbic) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//model1, クラスタ数1のモデル | |
data{ | |
int N; //サンプル数 | |
vector[2] y[N]; //観測値 | |
} | |
parameters{ | |
vector[2] mu; //平均 | |
cov_matrix[2] Sigma; //分散 | |
} | |
model{ | |
//事前分布 | |
for(i in 1:2){ | |
mu[i] ~ normal(0,1); | |
} | |
//負の対数尤度 | |
for(n in 1:N){ | |
target += -log(determinant(Sigma)) - 0.5 * (y[n] - mu)' * inverse(Sigma) * (y[n] - mu); //定数は省略 | |
} | |
} | |
generated quantities{ | |
vector[N] log_likelihood; | |
for(n in 1:N){ | |
log_likelihood[n] = -log(determinant(Sigma)) - 0.5 * (y[n] - mu)' * inverse(Sigma) * (y[n] - mu); | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data{ | |
int N; //サンプル数 | |
vector[2] y[N]; //観測値 | |
} | |
parameters{ | |
vector[2] mu1; //クラスタ1の平均 | |
cov_matrix[2] Sigma1; //クラスタ1の分散 | |
vector[2] mu2; //クラスタ2の平均 | |
cov_matrix[2] Sigma2; //クラスタ2の分散 | |
} | |
transformed parameters{ | |
real<upper=0> soft_z[N, 2]; // log unnormalized clusters | |
for(n in 1:N){ | |
soft_z[n, 1] = -log(2) - log(determinant(Sigma1)) - 0.5 * (y[n] - mu1)' * inverse(Sigma1) * (y[n] - mu1); //定数は省略 | |
soft_z[n, 2] = -log(2) - log(determinant(Sigma2)) - 0.5 * (y[n] - mu2)' * inverse(Sigma2) * (y[n] - mu2); //定数は省略 | |
} | |
} | |
model{ | |
//事前分布 | |
for(i in 1:2){ | |
mu1[i] ~ normal(0,1); | |
} | |
for(i in 1:2){ | |
mu2[i] ~ normal(0,1); | |
} | |
//負の対数尤度 | |
for(n in 1:N){ | |
target += log_sum_exp(soft_z[n]); | |
} | |
} | |
generated quantities{ | |
vector[N] log_likelihood; | |
for(n in 1:N){ | |
log_likelihood[n] = log_sum_exp(soft_z[n]); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment