Skip to content

Instantly share code, notes, and snippets.

@rpietro
Created August 13, 2013 14:32
Show Gist options
  • Save rpietro/6221674 to your computer and use it in GitHub Desktop.
Save rpietro/6221674 to your computer and use it in GitHub Desktop.
script from Hadley Wickham's book with some comments and additions
#install.packages(ggplot2)
require(ggplot2)
set.seed(1410) # Make the sample reproducible
dsmall <- diamonds[sample(nrow(diamonds), 100), ]
head(dsmall)
?diamonds
qplot(carat, price, data = diamonds)
qplot(log(carat), log(price), data = diamonds)
qplot(carat, x * y * z, data = diamonds)
# Mapping point colour to diamond colour (left), and point shape to cut
# quality (right).
qplot(carat, price, data = dsmall, colour = color)
qplot(carat, price, data = dsmall, shape = cut)
# Reducing the alpha value from 1/10 (left) to 1/100 (middle) to 1/200
# (right) makes it possible to see where the bulk of the points lie.
qplot(carat, price, data = diamonds, alpha = I(1/10))
qplot(carat, price, data = diamonds, alpha = I(1/100))
qplot(carat, price, data = diamonds, alpha = I(1/200))
# Smooth curves add to scatterplots of carat vs.\ price. The dsmall
# dataset (left) and the full dataset (right).
qplot(carat, price, data = dsmall, geom = c("point", "smooth"))
qplot(carat, price, data = diamonds, geom = c("point", "smooth"))
# The effect of the span parameter. (Left) \code{span = 0.2}, and
# (right) \code{span = 1}.
qplot(carat, price, data = dsmall, geom = c("point", "smooth"),
span = 0.2)
qplot(carat, price, data = dsmall, geom = c("point", "smooth"),
span = 1)
# The effect of the formula parameter, using a generalised additive
# model as a smoother. (Left) \code{formula = y ~ s(x)}, the default;
# (right) \code{formula = y ~ s(x, bs = "cs")}.
library(mgcv)
qplot(carat, price, data = dsmall, geom = c("point", "smooth"),
method = "gam", formula = y ~ s(x))
qplot(carat, price, data = dsmall, geom = c("point", "smooth"),
method = "gam", formula = y ~ s(x, bs = "cs"))
# Using jittering (left) and boxplots (right) to investigate the
# distribution of price per carat, conditional on colour. As the
# colour improves (from left to right) the spread of values decreases,
# but there is little change in the centre of the distribution.
qplot(color, price / carat, data = diamonds, geom = "jitter")
qplot(color, price / carat, data = diamonds, geom = "boxplot")
# Varying the alpha level. From left to right: $1/5$, $1/50$, $1/200$.
# As the opacity decreases we begin to see where the bulk of the data
# lies. However, the boxplot still does much better.
qplot(color, price / carat, data = diamonds, geom = "jitter",
alpha = I(1 / 5))
qplot(color, price / carat, data = diamonds, geom = "jitter",
alpha = I(1 / 50))
qplot(color, price / carat, data = diamonds, geom = "jitter",
alpha = I(1 / 200))
# Displaying the distribution of diamonds. (Left) \code{geom =
# "histogram"} and (right) \code{geom = "density"}.
qplot(carat, data = diamonds, geom = "histogram")
qplot(carat, data = diamonds, geom = "density")
# Varying the bin width on a histogram of carat reveals interesting
# patterns. Binwidths from left to right: 1, 0.1 and 0.01 carats. Only
# diamonds between 0 and 3 carats shown.
qplot(carat, data = diamonds, geom = "histogram", binwidth = 1,
xlim = c(0,3))
qplot(carat, data = diamonds, geom = "histogram", binwidth = 0.1,
xlim = c(0,3))
qplot(carat, data = diamonds, geom = "histogram", binwidth = 0.01,
xlim = c(0,3))
# Mapping a categorical variable to an aesthetic will automatically
# split up the geom by that variable. (Left) Density plots are
# overlaid and (right) histograms are stacked.
qplot(carat, data = diamonds, geom = "density", colour = color)
qplot(carat, data = diamonds, geom = "histogram", fill = color)
# Bar charts of diamond colour. The left plot shows counts and the
# right plot is weighted by \code{weight = carat} to show the total
# weight of diamonds of each colour.
qplot(color, data = diamonds, geom = "bar")
qplot(color, data = diamonds, geom = "bar", weight = carat) +
scale_y_continuous("carat")
# Two time series measuring amount of unemployment. (Left) Percent of
# population that is unemployed and (right) median number of weeks
# unemployed. Plots created with {\tt geom="line"}.
qplot(date, unemploy / pop, data = economics, geom = "line")
qplot(date, uempmed, data = economics, geom = "line")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment