Skip to content

Instantly share code, notes, and snippets.

@dantonnoriega
Last active October 17, 2018 22:43
Show Gist options
  • Save dantonnoriega/1023bb0b4de0ac0ebb759eec4d4bd902 to your computer and use it in GitHub Desktop.
Save dantonnoriega/1023bb0b4de0ac0ebb759eec4d4bd902 to your computer and use it in GitHub Desktop.
# recreate the code from https://arxiv.org/pdf/1808.06399.pdf using greta (https://greta-dev.github.io/greta/)
library("DirichletReg")
Bld <- BloodSamples
Bld <- na.omit(Bld)
Bld$Smp <- DR_data(Bld[, 1:4])
# simplex function. applies simplex to each row of a matrix.
# HUGE speed gains using matrix mat vs for loop!
simplex_mat <- function(x){
exp_x <- exp(x)
greta::sweep(exp_x, 1, greta::rowSums(exp_x), FUN = "/")
}
# using greta
# !! requires the development version to run!
# devtools::install_github("greta-dev/greta@dev")
# convert data to matrix then greta data
Y <- matrix(Bld$Smp, ncol = ncol(Bld$Smp))
Y <- greta::as_data(Y)
X <- as.matrix(model.matrix(lm(Albumin ~ Disease, data = Bld)))
X <- matrix(nrow = nrow(X), ncol = ncol(X), data = as.numeric(X))
sd_prior = 1
theta <- greta::normal(0, sd_prior) # theta ~ normal(0, sd_prior); (scaling constant)
# greta lets you build data arrays then fill in with variable/operation values but not the other direction
beta <- greta::zeros(ncol(X), ncol(Y)) # initialize regression coefficients
# leave last column as zeros to normalize (see paper for motivation)
beta[ , 1:(ncol(Y) - 1)] <- greta::normal(0, sd_prior, dim = c(ncol(X), ncol(Y) - 1))
alpha <- greta::`%*%`(X, beta)
greta::distribution(Y) = greta::dirichlet(simplex_mat(alpha)*exp(theta)) # simplex via matrix operations
m <- greta::model(beta)
draws <- greta::mcmc(m, chains = 4, n_samples = 2000, warmup = 1000)
# split up draws --------------
# greta outputs a single matrix of params
# need to split them up accordingly
P <- do.call(rbind, draws) # stack all chains
nms <- colnames(P) # get row names
# row 1 of beta estimates (untransformed)
B1 <- P %>% .[, grepl('1,.', nms)] # extract names with index [1,j]
# row 2 of beta estimates (untransformed)
B2 <- P %>% .[, grepl('2,.', nms)] # extract names with index [2,j
# plots --------------
my_colors <- scales::hue_pal()(4)
# disease A
aux <- simplex_mat(B1)
layout(matrix(1:2, ncol = 2))
plot(1:4, Bld[1, 1:4], ylim = c(0, 0.6), type = "n", xaxt = "n", las = 1,
xlab = "", ylab = "Proportion", main = "Disease A", xlim = c(0.6, 4.4))
abline(h = seq(0, 0.6, by = 0.1), col = "grey", lty = 3)
axis(1, at = 1:4, labels = names(Bld)[1:4], las = 2)
apply(subset(Bld, Disease == "A")[, 1:4], MAR = 1, FUN = points, pch = 16,
col = "grey")
lines(apply(subset(Bld, Disease == "A")[, 1:4], MAR = 2, FUN = mean),
type = "b", pch = 16, cex = 1.2, lwd = 2)
lines(apply(aux, MAR = 2, FUN = quantile, prob = 0.975), type = "b", pch = 4,
lty = 2, col = my_colors[1])
lines(apply(aux, MAR = 2, FUN = quantile, prob = 0.025), type = "b", pch = 4,
lty = 2, col = my_colors[1])
lines(apply(aux, MAR = 2, FUN = mean), lwd = 2, col = my_colors[1], type = "b",
pch = 16)
plot(1:4, Bld[1, 1:4], ylim = c(0, 0.6), type = "n", xaxt = "n", las = 1,
xlab = "", ylab = "Proportion", main = "Disease B", xlim = c(0.6, 4.4))
abline(h = seq(0, 0.6, by = 0.1), col = "grey", lty = 3)
axis(1, at = 1:4, labels = names(Bld)[1:4], las = 2)
# disease B estimates
aux <- simplex_mat(B1 + B2) # simplex of the sum of coefficients
apply(subset(Bld, Disease == "B")[, 1:4], MAR = 1, FUN = points, pch = 16,
col = "grey")
lines(apply(subset(Bld, Disease == "B")[, 1:4], MAR = 2, FUN = mean),
type = "b", pch = 16, cex = 1.2, lwd = 2)
lines(apply(aux, MAR = 2, FUN = quantile, prob = 0.975), type = "b", pch = 4,
lty = 2, col = my_colors[2])
lines(apply(aux, MAR = 2, FUN = quantile, prob = 0.025), type = "b", pch = 4,
lty = 2, col = my_colors[2])
lines(apply(aux, MAR = 2, FUN = mean), lwd = 2, col = my_colors[2], type = "b",
pch = 16)
layout(1)
@dantonnoriega
Copy link
Author

Any chance you can provide a reprex using imultilogit? I cannot get it to work (and perhaps am misunderstanding how it should work).

# using greta
# !! requires the development version to run!
# devtools::install_github("greta-dev/greta@dev")
# convert data to matrix then greta data
Y <- matrix(Bld$Smp, ncol = ncol(Bld$Smp))
Y <- greta::as_data(Y)
X <- as.matrix(model.matrix(lm(Albumin ~ Disease, data = Bld)))
X <- matrix(nrow = nrow(X), ncol = ncol(X), data = as.numeric(X))
sd_prior = 1
theta <- greta::normal(0, sd_prior) # theta ~ normal(0, sd_prior); (scaling constant)
# greta lets you build data arrays then fill in with variable/operation values but not the other direction
# leave last column as zeros to normalize (see paper for motivation)
beta <- greta::normal(0, sd_prior, dim = c(ncol(X), ncol(Y) - 1))
alpha <- greta::`%*%`(X, beta)
alpha <- greta::imultilogit(alpha)
greta::distribution(Y) = greta::dirichlet(alpha*exp(theta)) # simplex via matrix operations
m <- greta::model(beta)
draws <- greta::mcmc(m, chains = 4, n_samples = 2000, warmup = 1000)

Yields

running 4 chains simultaneously on up to 4 cores

    warmup                                           0/1000 | eta:  ?s          Error: greta hit a tensorflow error:

Error in py_call_impl(callable, dots$args, dots$keywords): InvalidArgumentError: In[0].dim(0) and In[1].dim(0) must be the same: [1,30,2] vs [4,2,3]
	 [[Node: mcmc_sample_chain/mh_bootstrap_results/hmc_kernel_bootstrap_results/maybe_call_fn_and_grads/value_and_gradients/MatMul = BatchMatMul[T=DT_DOUBLE, adj_x=false, adj_y=false, _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_Placeholder_3_0_5, mcmc_sample_chain/mh_bootstrap_results/hmc_kernel_bootstrap_results/maybe_call_fn_and_grads/value_and_gradients/Reshape)]]

Caused by op 'mcmc_sample_chain/mh_bootstrap_results/hmc_kernel_bootstrap_results/maybe_call_fn_and_grads/value_and_gradients/MatMul', defined at:
  File "/Users/danton/anaconda3/envs/r-tensorflow/lib/python3.6/site-packages/tensorflow_probability/python/mcmc/sample.py", line 240, in sample_chain
    previous_kernel_results = kernel.bootstrap_results(current_state)
  File "/Users/danton/anaconda3/envs/r-tensorflow/lib/pyth

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment