Created
August 8, 2013 10:47
-
-
Save aaronsaunders/6183641 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# Steve Pittard - [email protected], 03/19/12 | |
# Code to illustrate motivations for using apply function | |
# | |
# See www.bimcore.emory.edu/bbseries for slides and code downloads | |
# | |
# References include: | |
# http://statland.org/R/R/Rpulse2.htm , http://www.cyclismo.org/tutorial/R/tables.html#manipulations | |
# http://nsaunders.wordpress.com/2010/08/20/a-brief-introduction-to-apply-in-r/ | |
# | |
data(mtcars) | |
?mtcars | |
by(mtcars$mpg, list(am=mtcars$am, cyl=mtcars$cyl), mean) | |
by(mtcars[, c('hp','wt')], list(am=mtcars$am), mean) | |
by(mtcars['mpg'], list(am=mtcars$am, cyl=mtcars$cyl), mean) | |
tapply(mtcars$mpg, mtcars$am,mean) | |
tapply(mtcars$mpg, list(mtcars$am,mtcars$cyl), mean) | |
### | |
# Let's talk about aggregation now that we know something about the apply functions | |
### | |
# Simple tables | |
letters[1:10] # Built in letters char vector | |
my.sample <- sample(letters[1:10], 20, replace=TRUE) # sample 20 times from the 1st 10 letters with replacement | |
table(my.sample) # Counts frequency of letters | |
# The table is much simpler than writing a for loop | |
# We will now use the internal mtcars data set - assumes some factors are present | |
my.mtcars <- transform(mtcars, am=factor(am, labels=c("auto","manual"))) | |
my.table <- table(my.mtcars$cyl,my.mtcars$am) | |
# We can even add in another group - Let's try number of gears | |
my.3d.table <- table(my.mtcars$cyl, my.mtcars$am, my.mtcars$gear) | |
my.3d.table | |
# Here we create some categories on the fly using cut | |
with(my.mtcars, table(cut(mpg, quantile(mpg)), cyl)) | |
with(my.mtcars, table(cut(mpg, quantile(mpg), labels=c("Big Gas Hog","Gas Hog","Not Bad","The Best")), | |
cyl)) | |
# The xtabs function also summarizes things. It uses a formula notation | |
xtabs(~SNP1 + SNP2, new.snps) | |
xtabs(~am + cyl, my.mtcars) | |
xtabs(~am + cyl + gear, my.mtcars) | |
class(my.table) | |
my.table | |
# Tables are easily visualized | |
barplot(my.table, legend=TRUE, col=c("red","blue","green"), ylim=c(0,20)) | |
plot(my.table, col=c("red","blue","green"), main="Mosaic Plot") | |
# You can see the margin totals | |
addmargins(my.table) | |
addmargins(my.table, 1) | |
addmargins(my.table, 2) | |
# Working with table proportions | |
addmargins(prop.table(my.table)) | |
addmargins(prop.table(my.table, 2)) # Look at proportions columnwise | |
addmargins(prop.table(my.table, 1)) # Look at proportions row wise | |
## | |
# Let's explore the aggregate function | |
## | |
# The aggregate function in this case is "mean" but could be something else | |
aggregate(mtcars['mpg'], list(Transmission=mtcars$am),mean) | |
aggregate(mtcars[c('mpg','hp')], list(Transmission_Type=mtcars$am), mean) | |
aggregate(mtcars[c('mpg','hp')], list(Transmission_Type=mtcars$am, | |
Cylinders=mtcars$cyl), mean) | |
aggregate(mpg ~ am, my.mtcars, mean) | |
# Do the mean aggregation of MPG by Transmission and cylinder type | |
aggregate(mpg~am+cyl, my.mtcars, mean) | |
# Do the mean aggregation of MPG & weight by transmission and cylinder type | |
aggregate(cbind(mpg,wt,hp) ~ am+cyl, my.mtcars, mean) | |
# Aggregate has an alternative calling sequence | |
aggregate(my.mtcars[c('mpg','wt','hp')], | |
by=list(am=my.mtcars$am, cyl=my.mtcars$cyl), mean) | |
# The Reshape package | |
library(reshape) | |
my.melt <- melt(my.mtcars, id.vars=c("cyl","am")) | |
my.melt | |
cast(my.melt, cyl ~ variable, mean) | |
cast(my.melt, cyl ~ variable| am, mean,subset=variable %in% c("hp","wt")) | |
# Let's work with some SNP data | |
cast(melt(new.snps, id.vars=c("SNP1","SNP2","co")), SNP1+SNP2+co ~ variable,mean) | |
aggregate(new.snps[c('X1','X2')], | |
by=list(SNP1=new.snps$SNP1, SNP2=new.snps$SNP2, CO=new.snps$co), mean) | |
# But we can provide our own function as an argument. | |
# In this case the mean and standard deviation | |
myFunction <- function(x) {mx=mean(x);sdx=sd(x);mz=c(mean=mx,sd=sdx)} | |
aggregate(new.snps[c('X1','X2')], by=list(SNP1=my.snps$SNP1,SNP2=my.snps$SNP2), myFunction ) | |
# What would the reshape solution look like ? | |
cast(melt(new.snps,id.vars=c("SNP1","SNP2","co")),SNP1 +SNP2 ~ variable, myFunction) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
by(mtcars[, c('hp','wt')], list(am=mtcars$am), mean)
doesn't work.
by(mtcars[, c('hp','wt')], list(am=mtcars$am), colMeans)
might be what was intended.