Skip to content

Instantly share code, notes, and snippets.

@aaronsaunders
Created August 8, 2013 10:47
Show Gist options
  • Save aaronsaunders/6183641 to your computer and use it in GitHub Desktop.
Save aaronsaunders/6183641 to your computer and use it in GitHub Desktop.
#
# Steve Pittard - [email protected], 03/19/12
# Code to illustrate motivations for using apply function
#
# See www.bimcore.emory.edu/bbseries for slides and code downloads
#
# References include:
# http://statland.org/R/R/Rpulse2.htm , http://www.cyclismo.org/tutorial/R/tables.html#manipulations
# http://nsaunders.wordpress.com/2010/08/20/a-brief-introduction-to-apply-in-r/
#
data(mtcars)
?mtcars
by(mtcars$mpg, list(am=mtcars$am, cyl=mtcars$cyl), mean)
by(mtcars[, c('hp','wt')], list(am=mtcars$am), mean)
by(mtcars['mpg'], list(am=mtcars$am, cyl=mtcars$cyl), mean)
tapply(mtcars$mpg, mtcars$am,mean)
tapply(mtcars$mpg, list(mtcars$am,mtcars$cyl), mean)
###
# Let's talk about aggregation now that we know something about the apply functions
###
# Simple tables
letters[1:10] # Built in letters char vector
my.sample <- sample(letters[1:10], 20, replace=TRUE) # sample 20 times from the 1st 10 letters with replacement
table(my.sample) # Counts frequency of letters
# The table is much simpler than writing a for loop
# We will now use the internal mtcars data set - assumes some factors are present
my.mtcars <- transform(mtcars, am=factor(am, labels=c("auto","manual")))
my.table <- table(my.mtcars$cyl,my.mtcars$am)
# We can even add in another group - Let's try number of gears
my.3d.table <- table(my.mtcars$cyl, my.mtcars$am, my.mtcars$gear)
my.3d.table
# Here we create some categories on the fly using cut
with(my.mtcars, table(cut(mpg, quantile(mpg)), cyl))
with(my.mtcars, table(cut(mpg, quantile(mpg), labels=c("Big Gas Hog","Gas Hog","Not Bad","The Best")),
cyl))
# The xtabs function also summarizes things. It uses a formula notation
xtabs(~SNP1 + SNP2, new.snps)
xtabs(~am + cyl, my.mtcars)
xtabs(~am + cyl + gear, my.mtcars)
class(my.table)
my.table
# Tables are easily visualized
barplot(my.table, legend=TRUE, col=c("red","blue","green"), ylim=c(0,20))
plot(my.table, col=c("red","blue","green"), main="Mosaic Plot")
# You can see the margin totals
addmargins(my.table)
addmargins(my.table, 1)
addmargins(my.table, 2)
# Working with table proportions
addmargins(prop.table(my.table))
addmargins(prop.table(my.table, 2)) # Look at proportions columnwise
addmargins(prop.table(my.table, 1)) # Look at proportions row wise
##
# Let's explore the aggregate function
##
# The aggregate function in this case is "mean" but could be something else
aggregate(mtcars['mpg'], list(Transmission=mtcars$am),mean)
aggregate(mtcars[c('mpg','hp')], list(Transmission_Type=mtcars$am), mean)
aggregate(mtcars[c('mpg','hp')], list(Transmission_Type=mtcars$am,
Cylinders=mtcars$cyl), mean)
aggregate(mpg ~ am, my.mtcars, mean)
# Do the mean aggregation of MPG by Transmission and cylinder type
aggregate(mpg~am+cyl, my.mtcars, mean)
# Do the mean aggregation of MPG & weight by transmission and cylinder type
aggregate(cbind(mpg,wt,hp) ~ am+cyl, my.mtcars, mean)
# Aggregate has an alternative calling sequence
aggregate(my.mtcars[c('mpg','wt','hp')],
by=list(am=my.mtcars$am, cyl=my.mtcars$cyl), mean)
# The Reshape package
library(reshape)
my.melt <- melt(my.mtcars, id.vars=c("cyl","am"))
my.melt
cast(my.melt, cyl ~ variable, mean)
cast(my.melt, cyl ~ variable| am, mean,subset=variable %in% c("hp","wt"))
# Let's work with some SNP data
cast(melt(new.snps, id.vars=c("SNP1","SNP2","co")), SNP1+SNP2+co ~ variable,mean)
aggregate(new.snps[c('X1','X2')],
by=list(SNP1=new.snps$SNP1, SNP2=new.snps$SNP2, CO=new.snps$co), mean)
# But we can provide our own function as an argument.
# In this case the mean and standard deviation
myFunction <- function(x) {mx=mean(x);sdx=sd(x);mz=c(mean=mx,sd=sdx)}
aggregate(new.snps[c('X1','X2')], by=list(SNP1=my.snps$SNP1,SNP2=my.snps$SNP2), myFunction )
# What would the reshape solution look like ?
cast(melt(new.snps,id.vars=c("SNP1","SNP2","co")),SNP1 +SNP2 ~ variable, myFunction)
@ejanalysis
Copy link

by(mtcars[, c('hp','wt')], list(am=mtcars$am), mean)
doesn't work.
by(mtcars[, c('hp','wt')], list(am=mtcars$am), colMeans)
might be what was intended.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment