twolodzko · September 1, 2017 21:35
diff --git a/bayesDensity.R b/bayesDensity.R

 library(extraDistr)

 bins <- function(x, n = 512L) {
  x <- x[!is.na(x)]
  h <- diff(range(x))/(n-4)
  mids <- seq(min(x)-2*h, max(x)+2*h, length.out = n)
  breaks <- c(mids[1L] - h, mids + h)
  counts <- unname(tapply(x, cut(x, breaks), length,
                   default = 0L))
  list(
    mids = mids,
    breaks = breaks,
    counts = counts
  )
 }

 #' Bayesian kernel density estimation
 #'
 #' @param x      numeric vector
 #' @param bw     the smoothing bandwidth to be used
 #' @param m      size of the grid used for calculating density
 #' @param nsim   number of simulated draws
 #' @param n      the number of equally spaced points at which the density is to be estimated
 #' @param alpha  confidence level of the interval
 #' @param na.rm  logical; if \code{TRUE}, missing values are removed from \code{x}.
 #'               If \code{FALSE} any missing values cause an error.
 #' 
 #' @examples
 #' 
 #' x <- c(rnorm(150, 20), rnorm(200, 15))
 #' 
 #' (fit <- bayesDensity(x))
 #' plot(fit)
 #' lines(density(x), col = "red", lty = 2)
 #' rug(x, lwd = 2)
 #' 
 #' x <- mtcars$mpg
 #' 
 #' plot(bayesDensity(x))
 #' lines(density(x), col = "red", lty = 2)
 #' rug(x, lwd = 2)
 #' 
 #' @references
 #' Sibisi, S., & Skilling, J. (1996).
 #' Bayesian Density Estimation.
 #' In Maximum Entropy and Bayesian Methods (pp. 189-198). Springer.
 #' 
 #' @importFrom extraDistr rdirichlet
 #' @importFrom stats dnorm
 #' 
 #' @export

 bayesDensity <- function(x, bw = bw.nrd(x), m = 512, nsim = 5000,
                         n = 512, alpha = 0.95, na.rm = TRUE) {
  
  call <- match.call()
  xnam <- as.character(deparse(substitute(x)))
  N <- length(x)
  
  x.na <- is.na(x)
  if (any(x.na)) {
    if (na.rm) 
      x <- x[!x.na]
    else stop("'x' contains missing values")
  }
  
  x.finite <- is.finite(x)
  if (any(!x.finite))
    x <- x[x.finite]
  
  bs <- bins(x, m)
  grid <- seq(min(x)-4*bw, max(x)+4*bw, length.out = n)
  
  K <- outer(grid, bs$mids, function(y, mu) dnorm(y, mu, bw))
  phi <- rdirichlet(nsim, as.numeric(1/m + bs$counts))
  
  sim <- phi %*% t(K)
  quant <- apply(sim, 2, quantile, c((1-alpha)/2, 0.5, 1-(1-alpha)/2))
  mean <- colMeans(sim)
  sd <- apply(sim, 2, sd)
  
  structure(
    list(
      x = grid,
      y = mean,
      low = quant[1, ],
      high = quant[3, ],
      median = quant[2, ],
      mean = mean,
      sd = sd,
      m = m,
      n = N,
      bw = bw,
      nsim = nsim,
      alpha = alpha,
      call = call,
      data.name = xnam,
      hist = bins,
      has.na = FALSE
    ), class = c("bayesDensity", "density")
  )
  
 }

 plot.bayesDensity <- function(x, main = NULL, xlab = NULL,
                              ylab = "Density", type = "l",
                              zero.line = TRUE, ...) {
  if (is.null(xlab))
    xlab <- paste("N =", x$n, "  Bandwidth =", formatC(x$bw))
  if (is.null(main))
    main <- deparse(x$call)
  plot.default(x, main = main, xlab = xlab, ylab = ylab, type = type,
               ylim = c(0, max(x$high)), ...)
  if (zero.line)
    abline(h = 0, lwd = 0.1, col = "gray")

  lines(x$x, x$low,  col = "gray", lty = 2)
  lines(x$x, x$high, col = "gray", lty = 2)

  invisible(NULL)
 }

	library(extraDistr)

	bins <- function(x, n = 512L) {
	x <- x[!is.na(x)]
	h <- diff(range(x))/(n-4)
	mids <- seq(min(x)-2h, max(x)+2h, length.out = n)
	breaks <- c(mids[1L] - h, mids + h)
	counts <- unname(tapply(x, cut(x, breaks), length,
	default = 0L))
	list(
	mids = mids,
	breaks = breaks,
	counts = counts
	)
	}

	#' Bayesian kernel density estimation
	#'
	#' @param x numeric vector
	#' @param bw the smoothing bandwidth to be used
	#' @param m size of the grid used for calculating density
	#' @param nsim number of simulated draws
	#' @param n the number of equally spaced points at which the density is to be estimated
	#' @param alpha confidence level of the interval
	#' @param na.rm logical; if \code{TRUE}, missing values are removed from \code{x}.
	#' If \code{FALSE} any missing values cause an error.
	#'
	#' @examples
	#'
	#' x <- c(rnorm(150, 20), rnorm(200, 15))
	#'
	#' (fit <- bayesDensity(x))
	#' plot(fit)
	#' lines(density(x), col = "red", lty = 2)
	#' rug(x, lwd = 2)
	#'
	#' x <- mtcars$mpg
	#'
	#' plot(bayesDensity(x))
	#' lines(density(x), col = "red", lty = 2)
	#' rug(x, lwd = 2)
	#'
	#' @references
	#' Sibisi, S., & Skilling, J. (1996).
	#' Bayesian Density Estimation.
	#' In Maximum Entropy and Bayesian Methods (pp. 189-198). Springer.
	#'
	#' @importFrom extraDistr rdirichlet
	#' @importFrom stats dnorm
	#'
	#' @export

	bayesDensity <- function(x, bw = bw.nrd(x), m = 512, nsim = 5000,
	n = 512, alpha = 0.95, na.rm = TRUE) {

	call <- match.call()
	xnam <- as.character(deparse(substitute(x)))
	N <- length(x)

	x.na <- is.na(x)
	if (any(x.na)) {
	if (na.rm)
	x <- x[!x.na]
	else stop("'x' contains missing values")
	}

	x.finite <- is.finite(x)
	if (any(!x.finite))
	x <- x[x.finite]

	bs <- bins(x, m)
	grid <- seq(min(x)-4bw, max(x)+4bw, length.out = n)

	K <- outer(grid, bs$mids, function(y, mu) dnorm(y, mu, bw))
	phi <- rdirichlet(nsim, as.numeric(1/m + bs$counts))

	sim <- phi %*% t(K)
	quant <- apply(sim, 2, quantile, c((1-alpha)/2, 0.5, 1-(1-alpha)/2))
	mean <- colMeans(sim)
	sd <- apply(sim, 2, sd)

	structure(
	list(
	x = grid,
	y = mean,
	low = quant[1, ],
	high = quant[3, ],
	median = quant[2, ],
	mean = mean,
	sd = sd,
	m = m,
	n = N,
	bw = bw,
	nsim = nsim,
	alpha = alpha,
	call = call,
	data.name = xnam,
	hist = bins,
	has.na = FALSE
	), class = c("bayesDensity", "density")
	)

	}

	plot.bayesDensity <- function(x, main = NULL, xlab = NULL,
	ylab = "Density", type = "l",
	zero.line = TRUE, ...) {
	if (is.null(xlab))
	xlab <- paste("N =", x$n, " Bandwidth =", formatC(x$bw))
	if (is.null(main))
	main <- deparse(x$call)
	plot.default(x, main = main, xlab = xlab, ylab = ylab, type = type,
	ylim = c(0, max(x$high)), ...)
	if (zero.line)
	abline(h = 0, lwd = 0.1, col = "gray")

	lines(x$x, x$low, col = "gray", lty = 2)
	lines(x$x, x$high, col = "gray", lty = 2)

	invisible(NULL)
	}