hadley · June 22, 2012 14:06
diff --git a/discrete-rv.r b/discrete-rv.r
 # Inspired by Andreas Buja's code from 
 # http://stat.wharton.upenn.edu/~buja/STAT-101/src-probability.R

 rv <- function(x, probs = NULL) {
  if (is.null(probs)) {
    probs <- rep(1, length(x)) / length(x)
  }  
  
  vals <- as.numeric(x)
  ord <- order(vals)
  vals <- vals[ord]
  probs <- probs[ord]
  
  # Simplify by summing probabilities with equal x's
  group <- cumsum(c(TRUE, vals[-1] != vals[-length(x)]))
  vals <- as.vector(tapply(as.numeric(vals), group, "[", 1))
  probs <- as.vector(tapply(probs, group, sum))
  
  structure(vals, probs = probs, class = "rv")
 }
 is.rv <- function(x) inherits(x, "rv")
 probs <- function(x) attr(x, "probs")

 print.rv <- function(x) {
  X <- format(x, digits = 3)
  P <- format(probs(x), digits = 3)
  out <- cbind(X = X, "P(X)" = P)
  rownames(out) <- rep("", nrow(out))
  print(out, quote = FALSE)
 }

 Ops.rv <- function(e1, e2) {
  # Assume that two random variables are independent
  if (is.rv(e1) && is.rv(e2)) {
    vals <- as.vector(outer(e1, e2, .Generic))
    probs <- as.vector(outer(probs(e1), probs(e2), "*"))
    return(rv(vals, probs))
  }
  
  # Figure out which one is the rv, and which one is the number
  if (is.rv(e1)) {
    rv <- e1
    n <- e2
  } else {
    rv <- e2
    n <- e1
  }
  rv(NextMethod(), probs(rv))
 }

 "[.rv" <- function(x, i, ...) {
  rv(as.numeric(x)[i], prop.table(probs(x)[i]))
 }

 Math.rv <- function(x, ...) {
  rv(NextMethod(), probs(x))
 }

 plot.rv <- function(x, ...) {
  name <- deparse(substitute(x))
  ylim <- range(0, probs(x))
  
  plot(as.numeric(x), probs(x), type = "h", ylim = ylim,
    xlab = name, ylab = paste0("P(", name, ")"))
  points(as.numeric(x), probs(x), pch = 20)
  abline(h = 0, col = "gray")  
 }

 P <- function(x) {
  stopifnot(is.logical(x))
  sum(probs(x)[x])
 }
 E <- function(x) sum(as.numeric(x) * probs(x))
 # Subtle error:
 # E <- function(x) sum(x * probs(x))
 VAR <- function(x) E((x - E(x)) ^ 2)
 # Var <- function(x) E(x - E(x)) ^ 2
 SD <- function(x) sqrt(VAR(x))
 SKEW <- function(x) E((x - E(x)) ^ 3) / SD(x) ^ 3

 # Simulate n random numbers from the rv
 rsim <- function(x, n) {
  sample(x, n, prob = probs(x), replace = TRUE)
 }

 # rif(dice > 3, -1, 5)
 # How could you extend it so that you could do:
 # rif(dice > 3, 2 * dice, -4)
 rif <- function(x, yes, no) {
  stopifnot(is.numeric(yes), length(yes) == 1)
  stopifnot(is.numeric(no), length(no) == 1)

  rv(c(no, yes), probs(x))
 }

 # Standardise by subtracting off expectation and dividing by sd 
 # (often called a z score)
 Z <- function(x) (x - E(x)) / SD(x)

 # coin <- rv(c(0, 1))
 # dice <- rv(1:6)
 # plot(dice)
 # plot(dice + dice)
 # P(dice > dice)
 # P(dice < dice)
 # 
 # # Flip a coin, and if heads roll a dice to determine your winnings
 # E(coin * dice)
 # plot(coin * dice)
 # 
 # loaded <- rv(1:6, prop.table(c(1,1,1,1,2,4)))
	# Inspired by Andreas Buja's code from
	# http://stat.wharton.upenn.edu/~buja/STAT-101/src-probability.R

	rv <- function(x, probs = NULL) {
	if (is.null(probs)) {
	probs <- rep(1, length(x)) / length(x)
	}

	vals <- as.numeric(x)
	ord <- order(vals)
	vals <- vals[ord]
	probs <- probs[ord]

	# Simplify by summing probabilities with equal x's
	group <- cumsum(c(TRUE, vals[-1] != vals[-length(x)]))
	vals <- as.vector(tapply(as.numeric(vals), group, "[", 1))
	probs <- as.vector(tapply(probs, group, sum))

	structure(vals, probs = probs, class = "rv")
	}
	is.rv <- function(x) inherits(x, "rv")
	probs <- function(x) attr(x, "probs")

	print.rv <- function(x) {
	X <- format(x, digits = 3)
	P <- format(probs(x), digits = 3)
	out <- cbind(X = X, "P(X)" = P)
	rownames(out) <- rep("", nrow(out))
	print(out, quote = FALSE)
	}

	Ops.rv <- function(e1, e2) {
	# Assume that two random variables are independent
	if (is.rv(e1) && is.rv(e2)) {
	vals <- as.vector(outer(e1, e2, .Generic))
	probs <- as.vector(outer(probs(e1), probs(e2), "*"))
	return(rv(vals, probs))
	}

	# Figure out which one is the rv, and which one is the number
	if (is.rv(e1)) {
	rv <- e1
	n <- e2
	} else {
	rv <- e2
	n <- e1
	}
	rv(NextMethod(), probs(rv))
	}

	"[.rv" <- function(x, i, ...) {
	rv(as.numeric(x)[i], prop.table(probs(x)[i]))
	}

	Math.rv <- function(x, ...) {
	rv(NextMethod(), probs(x))
	}

	plot.rv <- function(x, ...) {
	name <- deparse(substitute(x))
	ylim <- range(0, probs(x))

	plot(as.numeric(x), probs(x), type = "h", ylim = ylim,
	xlab = name, ylab = paste0("P(", name, ")"))
	points(as.numeric(x), probs(x), pch = 20)
	abline(h = 0, col = "gray")
	}

	P <- function(x) {
	stopifnot(is.logical(x))
	sum(probs(x)[x])
	}
	E <- function(x) sum(as.numeric(x) * probs(x))
	# Subtle error:
	# E <- function(x) sum(x * probs(x))
	VAR <- function(x) E((x - E(x)) ^ 2)
	# Var <- function(x) E(x - E(x)) ^ 2
	SD <- function(x) sqrt(VAR(x))
	SKEW <- function(x) E((x - E(x)) ^ 3) / SD(x) ^ 3

	# Simulate n random numbers from the rv
	rsim <- function(x, n) {
	sample(x, n, prob = probs(x), replace = TRUE)
	}

	# rif(dice > 3, -1, 5)
	# How could you extend it so that you could do:
	# rif(dice > 3, 2 * dice, -4)
	rif <- function(x, yes, no) {
	stopifnot(is.numeric(yes), length(yes) == 1)
	stopifnot(is.numeric(no), length(no) == 1)

	rv(c(no, yes), probs(x))
	}

	# Standardise by subtracting off expectation and dividing by sd
	# (often called a z score)
	Z <- function(x) (x - E(x)) / SD(x)

	# coin <- rv(c(0, 1))
	# dice <- rv(1:6)
	# plot(dice)
	# plot(dice + dice)
	# P(dice > dice)
	# P(dice < dice)
	#
	# # Flip a coin, and if heads roll a dice to determine your winnings
	# E(coin * dice)
	# plot(coin * dice)
	#
	# loaded <- rv(1:6, prop.table(c(1,1,1,1,2,4)))