Created
June 30, 2016 15:04
-
-
Save arthurwuhoo/5399860065a1961e27ac38c91d39d3f2 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ===================================================================================================================== | |
# VISUALISATION EXERCISES | |
# ===================================================================================================================== | |
# We'll be using the baseball data from the corrgram package. | |
# Exercise 1 ---------------------------------------------------------------------------------------------------------- | |
# 1. Install corrgram if necessary. | |
# 2. Load the library. | |
# 3. Get acquainted with the baseball data. | |
# 4. Make a scatter plot showing logSal against Years. | |
# - Label axes appropriately. Add a plot title. | |
# - Use the col parameter to add a splash of colour. | |
# - Change the plotting symbol. Search for "pch" on ?par. | |
# 5. Generate a pairs plot for the variables `Atbat`, `Hits`, `Homer`, `Runs`, `RBI`, `Walks` and `Years`. | |
# - Which of these features are most obviously correlated? | |
# Exercise 2 ---------------------------------------------------------------------------------------------------------- | |
# 1. Make a box plot of logSal versus Position. Conclusions? | |
# 2. Make a mosaic plot which shows the distribution of Position versus Team. | |
# - See ?spineplot for details of the plot() method. | |
# - Create a mosaic plot for Position versus League. | |
# Exercise 3 ---------------------------------------------------------------------------------------------------------- | |
# 1. Create a histogram for logSal. | |
# - Change the bin width to 0.25. | |
# - Use a colour to fill the histogram. | |
# - Convert from counts to density. | |
# Exercise 4 ---------------------------------------------------------------------------------------------------------- | |
# Load ggplot2 and use it for the following exercises. | |
# 1. Make a scatter plot showing logSal against Years. | |
# - Colour points by Position. Would this be possible with base graphics? Would it be easy? | |
# - Deal with overplotting. | |
# - Change the plot theme. I like the classic theme, but you should find something that works for you. | |
# - Install ggthemes and explore a wider range of themes. | |
# - Add a smoothed curve to the data. | |
# - Try a different color palette. See http://colorbrewer2.org/. | |
# - Add a 2D density estimate. | |
# Exercise 5 ---------------------------------------------------------------------------------------------------------- | |
# 1. Make a histogram of logSal. | |
# - Sort out any warning messages. | |
# - Create overlays for each Position. | |
# - Split the plot into facets. | |
# - Add apropriate labels for axes. Add a plot title. | |
# 2. Create a smoothed density estimate with the same data. | |
# - Create overlays for each Position. | |
# Exercise 6 ---------------------------------------------------------------------------------------------------------- | |
# Do something awesome. | |
# ===================================================================================================================== | |
# VISUALISATION SOLUTIONS | |
# ===================================================================================================================== | |
# Exercise 1 ---------------------------------------------------------------------------------------------------------- | |
library(corrgram) | |
plot(logSal ~ Years, data = baseball, xlab = "Years of Play", ylab = "Logarithm of Salary", col = "red", pch = 19) | |
plot(baseball[, 5:11]) | |
# Exercise 2 ---------------------------------------------------------------------------------------------------------- | |
plot(logSal ~ Position, data = baseball) | |
plot(Position ~ Team, data = baseball) | |
mosaicplot(League ~ Position, data = baseball, main = "Mosaic: Position verus League") | |
# Exercise 3 ---------------------------------------------------------------------------------------------------------- | |
hist(baseball$logSal) | |
hist(baseball$logSal, breaks = seq(1.75, 3.5, 0.25)) | |
hist(baseball$logSal, breaks = seq(1.75, 3.5, 0.25), col = "lightgreen") | |
hist(baseball$logSal, breaks = seq(1.75, 3.5, 0.25), col = "lightgreen", probability = TRUE) | |
# Exercise 4 ---------------------------------------------------------------------------------------------------------- | |
library(ggplot2) | |
ggplot(baseball, aes(x = Years, y = logSal)) + | |
geom_point() | |
ggplot(baseball, aes(x = Years, y = logSal)) + | |
geom_point(aes(col = Position)) | |
ggplot(baseball, aes(x = Years, y = logSal)) + | |
geom_point(aes(col = Position), alpha = 0.5, position = "jitter") | |
ggplot(baseball, aes(x = Years, y = logSal)) + | |
geom_point(aes(col = Position), alpha = 0.5, position = "jitter") + | |
theme_classic() | |
library(ggthemes) | |
ggplot(baseball, aes(x = Years, y = logSal)) + | |
geom_point(aes(col = Position), alpha = 0.5, position = "jitter") + | |
theme_wsj() | |
ggplot(baseball, aes(x = Years, y = logSal)) + | |
geom_point(aes(col = Position), alpha = 0.5, position = "jitter") + | |
geom_smooth() + scale_colour_brewer(palette = "Set1") + | |
theme_classic() | |
ggplot(baseball, aes(x = Years, y = logSal)) + | |
geom_point(aes(col = Position), alpha = 0.5, position = "jitter") + | |
geom_density_2d(colour = "darkgrey") + | |
geom_smooth() + scale_colour_brewer(palette = "Set1") + | |
theme_classic() | |
# Exercise 5 ---------------------------------------------------------------------------------------------------------- | |
ggplot(baseball, aes(x = logSal)) + | |
geom_histogram(binwidth = 0.125) + | |
theme_classic() | |
# Stacked version | |
# | |
ggplot(baseball, aes(x = logSal)) + | |
geom_histogram(aes(fill = Position), binwidth = 0.125) + | |
theme_classic() | |
ggplot(baseball, aes(x = logSal)) + | |
geom_histogram(aes(fill = Position), binwidth = 0.125, alpha = 0.55, position = "identity") + | |
theme_classic() | |
ggplot(baseball, aes(x = logSal)) + | |
geom_histogram(aes(fill = Position), binwidth = 0.125, alpha = 0.55) + | |
facet_grid(Position ~ .) + | |
labs(x = "Logrithm of Salary", y = "Count") + | |
ggtitle("Salary Distribution by Position") + | |
theme_classic() | |
ggplot(baseball, aes(x = logSal)) + | |
geom_density(aes(fill = Position), color = "black", alpha = 0.5) + | |
scale_fill_brewer(palette = "Set1") + | |
theme_classic() | |
# Exercise 6 ---------------------------------------------------------------------------------------------------------- |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment