Skip to content

Instantly share code, notes, and snippets.

@wch
Created December 7, 2012 20:42
Show Gist options
  • Select an option

  • Save wch/4236334 to your computer and use it in GitHub Desktop.

Select an option

Save wch/4236334 to your computer and use it in GitHub Desktop.
R Workshop
Getting started
===============
# Make sure you have ggplot2 and plyr installed
install.packages(c("ggplot2", "plyr"), dep=T)
library(ggplot2)
library(plyr)
Basic examples
==============
faithful
head(faithful)
str(faithful)
## Scatter plot
ggplot(data=faithful, mapping=aes(x=eruptions, y=waiting)) + geom_point()
# More concisely:
ggplot(faithful, aes(x=eruptions, y=waiting)) + geom_point()
qplot(eruptions, waiting, data=faithful)
## Histogram
ggplot(faithful, aes(x = eruptions)) + geom_histogram()
ggplot(faithful, aes(x = eruptions)) + geom_histogram(binwidth=.25)
## Points and lines
pressure
str(pressure)
p <- ggplot(pressure, aes(x=temperature, y=pressure))
# Points
p + geom_point()
# Lines
p + geom_line()
# Points with lines
p + geom_line() + geom_point()
# Bar graph (with continous x axis)
p + geom_bar(stat = "identity")
# Two kinds of variables: continuous/numeric vs. discrete/categorical
# ggplot2 treats them very differently!
str(mtcars)
str(pressure)
str(PlantGrowth)
# View the dataset
PlantGrowth
head(PlantGrowth)
str(PlantGrowth)
ggplot(PlantGrowth, aes(x = group, y = weight)) + geom_point()
# Box plot
ggplot(PlantGrowth, aes(x = group, y = weight)) + geom_boxplot()
# View the dataset
PlantGrowth
ggplot(PlantGrowth, aes(x = group, y = weight)) + geom_point()
# Box plot
ggplot(PlantGrowth, aes(x = group, y = weight)) + geom_boxplot()
ggplot(PlantGrowth, aes(x = group, y = weight)) + geom_violin()
Understanding ggplot2
=====================
# All continuous variables
dat <- data.frame(var1 = c(2, 3, 5, 7),
var2 = c(2, 4, 8, 5),
var3 = c(5, 0, 4, 1))
# With some discrete variables
dat2 <- data.frame(var1 = c("A", "B", "A", "B", "A", "B"),
var2 = c("G1", "G0", "G2", "G1", "G0", "G2"),
var3 = c(5, 0, 4, 1, 6, 3))
# Mapping data to aesthetics
ggplot(dat, aes(x=var1, y=var2)) + geom_point()
ggplot(dat, aes(x=var1, y=var2, colour=var3)) + geom_point()
# Setting aesthetics
ggplot(dat, aes(x=var1, y=var2)) + geom_point(colour="red")
ggplot(dat, aes(x=var1, y=var2)) + geom_point(colour="red", size=6)
# Different geoms
ggplot(dat, aes(x=var1, y=var2)) + geom_point()
ggplot(dat, aes(x=var1, y=var2)) + geom_line()
ggplot(dat, aes(x=var1, y=var2)) + geom_bar(stat="identity")
# Multiple geoms
ggplot(dat, aes(x=var1, y=var2)) + geom_point() + geom_line()
# Equivalent to
ggplot(dat) +
geom_point(aes(x=var1, y=var2)) +
geom_line(aes(x=var1, y=var2))
ggplot() +
geom_point(aes(x=var1, y=var2), data=dat) +
geom_line(aes(x=var1, y=var2), data=dat)
# Mapping discrete variables
ggplot(dat2, aes(x=var1, y=var3)) + geom_point()
ggplot(dat2, aes(x=var1, y=var3, colour=var2)) + geom_point()
# Facets
ggplot(dat2, aes(x=var1, y=var3, colour=var2)) + geom_point()
ggplot(dat2, aes(x=var1, y=var3)) + geom_point() + facet_wrap( ~ var2)
ggplot(dat2, aes(x=var1, y=var3)) + geom_point() + facet_grid(. ~ var2)
ggplot(dat2, aes(x=var1, y=var3)) + geom_point() + facet_grid(var2 ~ .)
ggplot(dat2, aes(x=var1, y=var3)) + geom_point() + facet_grid(var1 ~ var2)
# Stats
ggplot(dat2, aes(x=var1, y=var3)) + geom_point()
# Equivalent to
ggplot(dat2, aes(x=var1, y=var3)) + geom_point(stat="identity")
ggplot(dat2, aes(x=var1, y=var3)) + stat_identity(geom="point")
ggplot(dat2, aes(x=var1, y=var3)) + geom_point()
More advanced graphs
=======================
## Scatter plot with regression lines
p <- ggplot(mtcars, aes(x=wt, y=mpg)) + geom_point()
p + geom_smooth()
p + geom_smooth(method=lm, se=FALSE)
## Box plots
ggplot(ToothGrowth, aes(x=supp, y=len)) + geom_point()
ggplot(ToothGrowth, aes(x=supp, y=len)) + geom_boxplot()
## Position adjustments
ggplot(ToothGrowth, aes(x=supp, y=len, fill=factor(dose))) + geom_boxplot()
ggplot(ToothGrowth, aes(x=factor(dose), y=len, fill=supp)) + geom_boxplot()
p <- ggplot(mtcars, aes(x=factor(cyl), fill=factor(am)))
p + geom_bar(position="dodge", colour="black")
p + geom_bar(position="stack", colour="black")
p <- ggplot(mpg, aes(x=displ, y=hwy))
p + geom_point()
p + geom_point(position="jitter")
p + geom_point(position=position_jitter())
p + geom_point(position=position_jitter(width=0.2, height=0))
## Saving
ggplot(mpg, aes(x=displ, y=hwy)) + geom_point()
ggsave("scatter.png")
p <- ggplot(mpg, aes(x=displ, y=hwy)) + geom_point()
ggsave("scatter.png", p, width=4, height=4)
p <- ggplot(mpg, aes(x=displ, y=hwy)) + geom_point()
png("scatter.png") # Or you can use pdf
print(p)
dev.off()
Wide vs. long data
==================
library(gcookbook)
simpledat
simpledat_long
plum_wide
plum
Group wise operations
=====================
tg_vc<- subset(ToothGrowth, supp == "VC")
mean(tg_vc$len)
# Or
library(plyr)
summarise(tg_vc, mean_len = mean(len))
ddply(ToothGrowth, "supp", summarise, mean_len = mean(len))
# Split on multiple variables
ddply(ToothGrowth, c("supp", "dose"), summarise, mean_len = mean(len))
# Multiple output variables
ddply(ToothGrowth, c("supp", "dose"), summarise, mean=mean(len), sd=sd(len))
# Make a plot
tg <- ddply(ToothGrowth, c("supp", "dose"),
summarise, mean=mean(len), sd=sd(len))
ggplot(tg, aes(x=factor(dose), y=mean, fill=supp)) +
geom_bar(stat="identity", position="dodge", colour="black")
Other resources
===============
# ggplot2 documentation: http://docs.ggplot2.org/current/
# Mailing list: https://groups.google.com/forum/?fromgroups#!forum/ggplot2
# Cookbook for R: http://wiki.stdout.org/rcookbook/
# Stackoverflow: http://stackoverflow.com/questions/tagged/r
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment