sckott · May 13, 2011 19:46
diff --git a/idata_frame.R b/idata_frame.R
 # load packages
 require(plyr); require(reshape2)

 # Make immutable data frame
 baseball_i <- idata.frame(baseball)

 # Example 1 - idata.frame more than twice as fast
 system.time( replicate(50, ddply( baseball, "year", summarise, mean(rbi))) )
 system.time( replicate(50, ddply( baseball_i, "year", summarise, mean(rbi))) )

 # Example 2 - Bummer, this does not work with idata.frame's
 colwise(max, is.numeric) ( baseball ) # works
 colwise(max, is.numeric) ( baseball_i ) # doesn't work

 # Example 3 - idata.frame twice as fast
 system.time( replicate(100, baseball[baseball$year == "1884", ] ) )
 system.time( replicate(100, baseball_i[baseball_i$year == "1884", ] ) )

 # Example 4 - idata.frame faster
 system.time( replicate(50, melt(baseball[, 1:4], id = 1) ) )
 system.time( replicate(50, melt(baseball_i[, 1:4], id = 1) ) )

 # And you can go back to a data frame by 
 d <- as.data.frame(baseball_i)
 str(d)


 # idata.frame doesn't work with the doBy package
 require(doBy)
 summaryBy(rbi ~ year, baseball_i, FUN=c(mean), na.rm=T)

 # But idata.frame works with aggregate in base (but with minimal speed gains)
 # and aggregate is faster than ddply 
 system.time( replicate(100, aggregate(rbi ~ year, baseball, mean) ) )
 system.time( replicate(100, aggregate(rbi ~ year, baseball_i, mean) ) )
 system.time( replicate(100, ddply( baseball_i, "year", summarise, mean(rbi)) ) )
	# load packages
	require(plyr); require(reshape2)

	# Make immutable data frame
	baseball_i <- idata.frame(baseball)

	# Example 1 - idata.frame more than twice as fast
	system.time( replicate(50, ddply( baseball, "year", summarise, mean(rbi))) )
	system.time( replicate(50, ddply( baseball_i, "year", summarise, mean(rbi))) )

	# Example 2 - Bummer, this does not work with idata.frame's
	colwise(max, is.numeric) ( baseball ) # works
	colwise(max, is.numeric) ( baseball_i ) # doesn't work

	# Example 3 - idata.frame twice as fast
	system.time( replicate(100, baseball[baseball$year == "1884", ] ) )
	system.time( replicate(100, baseball_i[baseball_i$year == "1884", ] ) )

	# Example 4 - idata.frame faster
	system.time( replicate(50, melt(baseball[, 1:4], id = 1) ) )
	system.time( replicate(50, melt(baseball_i[, 1:4], id = 1) ) )

	# And you can go back to a data frame by
	d <- as.data.frame(baseball_i)
	str(d)


	# idata.frame doesn't work with the doBy package
	require(doBy)
	summaryBy(rbi ~ year, baseball_i, FUN=c(mean), na.rm=T)

	# But idata.frame works with aggregate in base (but with minimal speed gains)
	# and aggregate is faster than ddply
	system.time( replicate(100, aggregate(rbi ~ year, baseball, mean) ) )
	system.time( replicate(100, aggregate(rbi ~ year, baseball_i, mean) ) )
	system.time( replicate(100, ddply( baseball_i, "year", summarise, mean(rbi)) ) )