Created
May 13, 2011 19:46
-
-
Save sckott/971191 to your computer and use it in GitHub Desktop.
Comparison of operations on normal and immutable (see package plyr) data frames
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# load packages | |
require(plyr); require(reshape2) | |
# Make immutable data frame | |
baseball_i <- idata.frame(baseball) | |
# Example 1 - idata.frame more than twice as fast | |
system.time( replicate(50, ddply( baseball, "year", summarise, mean(rbi))) ) | |
system.time( replicate(50, ddply( baseball_i, "year", summarise, mean(rbi))) ) | |
# Example 2 - Bummer, this does not work with idata.frame's | |
colwise(max, is.numeric) ( baseball ) # works | |
colwise(max, is.numeric) ( baseball_i ) # doesn't work | |
# Example 3 - idata.frame twice as fast | |
system.time( replicate(100, baseball[baseball$year == "1884", ] ) ) | |
system.time( replicate(100, baseball_i[baseball_i$year == "1884", ] ) ) | |
# Example 4 - idata.frame faster | |
system.time( replicate(50, melt(baseball[, 1:4], id = 1) ) ) | |
system.time( replicate(50, melt(baseball_i[, 1:4], id = 1) ) ) | |
# And you can go back to a data frame by | |
d <- as.data.frame(baseball_i) | |
str(d) | |
# idata.frame doesn't work with the doBy package | |
require(doBy) | |
summaryBy(rbi ~ year, baseball_i, FUN=c(mean), na.rm=T) | |
# But idata.frame works with aggregate in base (but with minimal speed gains) | |
# and aggregate is faster than ddply | |
system.time( replicate(100, aggregate(rbi ~ year, baseball, mean) ) ) | |
system.time( replicate(100, aggregate(rbi ~ year, baseball_i, mean) ) ) | |
system.time( replicate(100, ddply( baseball_i, "year", summarise, mean(rbi)) ) ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment