nassimhaddad · December 13, 2015 23:48
diff --git a/README.md b/README.md
diff --git a/super_fast_group_by.R b/super_fast_group_by.R
 # Set up some example data
 year <-    sample(1970:2008, 1e6, rep=T)
 state <-   sample(1:50, 1e6, rep=T)
 group1 <-  sample(1:6, 1e6, rep=T)
 group2 <-  sample(1:3, 1e6, rep=T)
 myFact <-  rnorm(100, 15, 1e6)
 weights <- rnorm(1e6)
 myDF <- data.frame(year, state, group1, group2, myFact, weights)


 library(plyr)
 system.time({
  ids <- id(myDF[c("year", "state", "group1", "group2")], drop = TRUE)
  data <- as.matrix(myDF[c("myFact", "weights")])
  indices <- plyr:::split_indices(seq_len(nrow(data)), ids, n = attr(ids, "n"))

  fun <- function(rows) {
    weighted.mean(data[rows, 1], data[rows, 2])
  }
  values <- vapply(indices, fun, numeric(1))

  labels <- myDF[match(seq_len(attr(ids, "n")), ids), 
    c("year", "state", "group1", "group2")]
  aggregateDF <- cbind(labels, values)})
	# Set up some example data
	year <- sample(1970:2008, 1e6, rep=T)
	state <- sample(1:50, 1e6, rep=T)
	group1 <- sample(1:6, 1e6, rep=T)
	group2 <- sample(1:3, 1e6, rep=T)
	myFact <- rnorm(100, 15, 1e6)
	weights <- rnorm(1e6)
	myDF <- data.frame(year, state, group1, group2, myFact, weights)


	library(plyr)
	system.time({
	ids <- id(myDF[c("year", "state", "group1", "group2")], drop = TRUE)
	data <- as.matrix(myDF[c("myFact", "weights")])
	indices <- plyr:::split_indices(seq_len(nrow(data)), ids, n = attr(ids, "n"))

	fun <- function(rows) {
	weighted.mean(data[rows, 1], data[rows, 2])
	}
	values <- vapply(indices, fun, numeric(1))

	labels <- myDF[match(seq_len(attr(ids, "n")), ids),
	c("year", "state", "group1", "group2")]
	aggregateDF <- cbind(labels, values)})