Created
August 13, 2013 14:32
-
-
Save rpietro/6221674 to your computer and use it in GitHub Desktop.
script from Hadley Wickham's book with some comments and additions
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#install.packages(ggplot2) | |
require(ggplot2) | |
set.seed(1410) # Make the sample reproducible | |
dsmall <- diamonds[sample(nrow(diamonds), 100), ] | |
head(dsmall) | |
?diamonds | |
qplot(carat, price, data = diamonds) | |
qplot(log(carat), log(price), data = diamonds) | |
qplot(carat, x * y * z, data = diamonds) | |
# Mapping point colour to diamond colour (left), and point shape to cut | |
# quality (right). | |
qplot(carat, price, data = dsmall, colour = color) | |
qplot(carat, price, data = dsmall, shape = cut) | |
# Reducing the alpha value from 1/10 (left) to 1/100 (middle) to 1/200 | |
# (right) makes it possible to see where the bulk of the points lie. | |
qplot(carat, price, data = diamonds, alpha = I(1/10)) | |
qplot(carat, price, data = diamonds, alpha = I(1/100)) | |
qplot(carat, price, data = diamonds, alpha = I(1/200)) | |
# Smooth curves add to scatterplots of carat vs.\ price. The dsmall | |
# dataset (left) and the full dataset (right). | |
qplot(carat, price, data = dsmall, geom = c("point", "smooth")) | |
qplot(carat, price, data = diamonds, geom = c("point", "smooth")) | |
# The effect of the span parameter. (Left) \code{span = 0.2}, and | |
# (right) \code{span = 1}. | |
qplot(carat, price, data = dsmall, geom = c("point", "smooth"), | |
span = 0.2) | |
qplot(carat, price, data = dsmall, geom = c("point", "smooth"), | |
span = 1) | |
# The effect of the formula parameter, using a generalised additive | |
# model as a smoother. (Left) \code{formula = y ~ s(x)}, the default; | |
# (right) \code{formula = y ~ s(x, bs = "cs")}. | |
library(mgcv) | |
qplot(carat, price, data = dsmall, geom = c("point", "smooth"), | |
method = "gam", formula = y ~ s(x)) | |
qplot(carat, price, data = dsmall, geom = c("point", "smooth"), | |
method = "gam", formula = y ~ s(x, bs = "cs")) | |
# Using jittering (left) and boxplots (right) to investigate the | |
# distribution of price per carat, conditional on colour. As the | |
# colour improves (from left to right) the spread of values decreases, | |
# but there is little change in the centre of the distribution. | |
qplot(color, price / carat, data = diamonds, geom = "jitter") | |
qplot(color, price / carat, data = diamonds, geom = "boxplot") | |
# Varying the alpha level. From left to right: $1/5$, $1/50$, $1/200$. | |
# As the opacity decreases we begin to see where the bulk of the data | |
# lies. However, the boxplot still does much better. | |
qplot(color, price / carat, data = diamonds, geom = "jitter", | |
alpha = I(1 / 5)) | |
qplot(color, price / carat, data = diamonds, geom = "jitter", | |
alpha = I(1 / 50)) | |
qplot(color, price / carat, data = diamonds, geom = "jitter", | |
alpha = I(1 / 200)) | |
# Displaying the distribution of diamonds. (Left) \code{geom = | |
# "histogram"} and (right) \code{geom = "density"}. | |
qplot(carat, data = diamonds, geom = "histogram") | |
qplot(carat, data = diamonds, geom = "density") | |
# Varying the bin width on a histogram of carat reveals interesting | |
# patterns. Binwidths from left to right: 1, 0.1 and 0.01 carats. Only | |
# diamonds between 0 and 3 carats shown. | |
qplot(carat, data = diamonds, geom = "histogram", binwidth = 1, | |
xlim = c(0,3)) | |
qplot(carat, data = diamonds, geom = "histogram", binwidth = 0.1, | |
xlim = c(0,3)) | |
qplot(carat, data = diamonds, geom = "histogram", binwidth = 0.01, | |
xlim = c(0,3)) | |
# Mapping a categorical variable to an aesthetic will automatically | |
# split up the geom by that variable. (Left) Density plots are | |
# overlaid and (right) histograms are stacked. | |
qplot(carat, data = diamonds, geom = "density", colour = color) | |
qplot(carat, data = diamonds, geom = "histogram", fill = color) | |
# Bar charts of diamond colour. The left plot shows counts and the | |
# right plot is weighted by \code{weight = carat} to show the total | |
# weight of diamonds of each colour. | |
qplot(color, data = diamonds, geom = "bar") | |
qplot(color, data = diamonds, geom = "bar", weight = carat) + | |
scale_y_continuous("carat") | |
# Two time series measuring amount of unemployment. (Left) Percent of | |
# population that is unemployed and (right) median number of weeks | |
# unemployed. Plots created with {\tt geom="line"}. | |
qplot(date, unemploy / pop, data = economics, geom = "line") | |
qplot(date, uempmed, data = economics, geom = "line") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment