Last active
September 1, 2017 21:35
-
-
Save twolodzko/4f6d7e8dbcabc8c9e67b061eff509c0d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(extraDistr) | |
bins <- function(x, n = 512L) { | |
x <- x[!is.na(x)] | |
h <- diff(range(x))/(n-4) | |
mids <- seq(min(x)-2*h, max(x)+2*h, length.out = n) | |
breaks <- c(mids[1L] - h, mids + h) | |
counts <- unname(tapply(x, cut(x, breaks), length, | |
default = 0L)) | |
list( | |
mids = mids, | |
breaks = breaks, | |
counts = counts | |
) | |
} | |
#' Bayesian kernel density estimation | |
#' | |
#' @param x numeric vector | |
#' @param bw the smoothing bandwidth to be used | |
#' @param m size of the grid used for calculating density | |
#' @param nsim number of simulated draws | |
#' @param n the number of equally spaced points at which the density is to be estimated | |
#' @param alpha confidence level of the interval | |
#' @param na.rm logical; if \code{TRUE}, missing values are removed from \code{x}. | |
#' If \code{FALSE} any missing values cause an error. | |
#' | |
#' @examples | |
#' | |
#' x <- c(rnorm(150, 20), rnorm(200, 15)) | |
#' | |
#' (fit <- bayesDensity(x)) | |
#' plot(fit) | |
#' lines(density(x), col = "red", lty = 2) | |
#' rug(x, lwd = 2) | |
#' | |
#' x <- mtcars$mpg | |
#' | |
#' plot(bayesDensity(x)) | |
#' lines(density(x), col = "red", lty = 2) | |
#' rug(x, lwd = 2) | |
#' | |
#' @references | |
#' Sibisi, S., & Skilling, J. (1996). | |
#' Bayesian Density Estimation. | |
#' In Maximum Entropy and Bayesian Methods (pp. 189-198). Springer. | |
#' | |
#' @importFrom extraDistr rdirichlet | |
#' @importFrom stats dnorm | |
#' | |
#' @export | |
bayesDensity <- function(x, bw = bw.nrd(x), m = 512, nsim = 5000, | |
n = 512, alpha = 0.95, na.rm = TRUE) { | |
call <- match.call() | |
xnam <- as.character(deparse(substitute(x))) | |
N <- length(x) | |
x.na <- is.na(x) | |
if (any(x.na)) { | |
if (na.rm) | |
x <- x[!x.na] | |
else stop("'x' contains missing values") | |
} | |
x.finite <- is.finite(x) | |
if (any(!x.finite)) | |
x <- x[x.finite] | |
bs <- bins(x, m) | |
grid <- seq(min(x)-4*bw, max(x)+4*bw, length.out = n) | |
K <- outer(grid, bs$mids, function(y, mu) dnorm(y, mu, bw)) | |
phi <- rdirichlet(nsim, as.numeric(1/m + bs$counts)) | |
sim <- phi %*% t(K) | |
quant <- apply(sim, 2, quantile, c((1-alpha)/2, 0.5, 1-(1-alpha)/2)) | |
mean <- colMeans(sim) | |
sd <- apply(sim, 2, sd) | |
structure( | |
list( | |
x = grid, | |
y = mean, | |
low = quant[1, ], | |
high = quant[3, ], | |
median = quant[2, ], | |
mean = mean, | |
sd = sd, | |
m = m, | |
n = N, | |
bw = bw, | |
nsim = nsim, | |
alpha = alpha, | |
call = call, | |
data.name = xnam, | |
hist = bins, | |
has.na = FALSE | |
), class = c("bayesDensity", "density") | |
) | |
} | |
plot.bayesDensity <- function(x, main = NULL, xlab = NULL, | |
ylab = "Density", type = "l", | |
zero.line = TRUE, ...) { | |
if (is.null(xlab)) | |
xlab <- paste("N =", x$n, " Bandwidth =", formatC(x$bw)) | |
if (is.null(main)) | |
main <- deparse(x$call) | |
plot.default(x, main = main, xlab = xlab, ylab = ylab, type = type, | |
ylim = c(0, max(x$high)), ...) | |
if (zero.line) | |
abline(h = 0, lwd = 0.1, col = "gray") | |
lines(x$x, x$low, col = "gray", lty = 2) | |
lines(x$x, x$high, col = "gray", lty = 2) | |
invisible(NULL) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment