jackmaney · December 8, 2014 23:56
diff --git a/gistfile1.r b/gistfile1.r
 # create dummy data
 df = data.frame(id=c(1,1,1,2,2,3,3,3,4,4,4), var1=c(-2.1, 1.1, 3.14, 2.17828, 9, 10, 8,2,1,0,-1), var2=c(1,2,3,4,5,6,7,8,9,10,11))

 # This is what you'd see if you called this data set from the interactive prompt (or REPL, as it's called (Read-Evaluate-Print-Loop)):
 #> df
 #   id     var1 var2
 #1   1 -2.10000    1
 #2   1  1.10000    2
 #3   1  3.14000    3
 #4   2  2.17828    4
 #5   2  9.00000    5
 #6   3 10.00000    6
 #7   3  8.00000    7
 #8   3  2.00000    8
 #9   4  1.00000    9
 #10  4  0.00000   10
 #11  4 -1.00000   11

 # What you're looking for is the aggregate function:
 means = aggregate(df, by=list(df$id), FUN=mean)

 # This forms groups based on the id column(the "by=list(df$id)" part), 
 # and takes means (the "FUN=mean" part) of the rest of your columns.
 #> means
 #  Group.1 id      var1 var2
 #1       1  1 0.7133333  2.0
 #2       2  2 5.5891400  4.5
 #3       3  3 6.6666667  7.0
 #4       4  4 0.0000000 10.0
	# create dummy data
	df = data.frame(id=c(1,1,1,2,2,3,3,3,4,4,4), var1=c(-2.1, 1.1, 3.14, 2.17828, 9, 10, 8,2,1,0,-1), var2=c(1,2,3,4,5,6,7,8,9,10,11))

	# This is what you'd see if you called this data set from the interactive prompt (or REPL, as it's called (Read-Evaluate-Print-Loop)):
	#> df
	# id var1 var2
	#1 1 -2.10000 1
	#2 1 1.10000 2
	#3 1 3.14000 3
	#4 2 2.17828 4
	#5 2 9.00000 5
	#6 3 10.00000 6
	#7 3 8.00000 7
	#8 3 2.00000 8
	#9 4 1.00000 9
	#10 4 0.00000 10
	#11 4 -1.00000 11

	# What you're looking for is the aggregate function:
	means = aggregate(df, by=list(df$id), FUN=mean)

	# This forms groups based on the id column(the "by=list(df$id)" part),
	# and takes means (the "FUN=mean" part) of the rest of your columns.
	#> means
	# Group.1 id var1 var2
	#1 1 1 0.7133333 2.0
	#2 2 2 5.5891400 4.5
	#3 3 3 6.6666667 7.0
	#4 4 4 0.0000000 10.0