Created
November 12, 2009 18:16
-
-
Save hadley/233134 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
mid_range <- function(x) mean(range(x, na.rm = TRUE)) | |
centres <- ddply(county_df, c("state", "county"), summarise, | |
lat = mid_range(lat), | |
long = mid_range(long) | |
) | |
bubbles <- merge(centres, unemp, by = c("state", "county")) | |
ggplot(bubbles, aes(long, lat)) + | |
geom_polygon(aes(group = group), data = state_df, | |
colour = "white", fill = NA) + | |
geom_point(aes(size = rate), alpha = 1/2) + | |
scale_area(to = c(0.5, 3), breaks = c(5, 10, 20, 30)) | |
ggplot(bubbles, aes(long, lat)) + | |
geom_polygon(aes(group = group), data = state_df, | |
colour = "white", fill = NA) + | |
geom_point(aes(color = rate_d)) + | |
scale_colour_brewer(pal = "PuRd") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(ggplot2) | |
library(maps) | |
# First (and most annoying) task - get matching state and county variables | |
# for both datasets. And unfortauntely it's not quite right, as you can | |
# see from the finish product - some counties are missing. | |
unemp <- read.csv("unemployment09.csv", header = F, stringsAsFactors = F) | |
names(unemp) <- c("id", "state_fips", "county_fips", "name", "year", | |
"?", "?", "?", "rate") | |
unemp$county <- tolower(gsub(" County, [A-Z]{2}", "", unemp$name)) | |
unemp$state <- gsub("^.*([A-Z]{2}).*$", "\\1", unemp$name) | |
county_df <- map_data("county") | |
names(county_df) <- c("long", "lat", "group", "order", "state_name", "county") | |
county_df$state <- state.abb[match(county_df$state_name, tolower(state.name))] | |
county_df$state_name <- NULL | |
state_df <- map_data("state") | |
# Combine together | |
choropleth <- merge(county_df, unemp, by = c("state", "county")) | |
choropleth <- choropleth[order(choropleth$order), ] | |
# Discretise rate to use with Brewer colour scheme - many options here | |
# choropleth$rate_d <- cut_number(choropleth$rate, 5) | |
# choropleth$rate_d <- cut_interval(choropleth$rate, 5) | |
# Nathan's choice is a little odd: | |
choropleth$rate_d <- cut(choropleth$rate, breaks = c(seq(0, 10, by = 2), 35)) | |
# Once you have the data in the right format, recreating the plot is straight | |
# forward. | |
ggplot(choropleth, aes(long, lat, group = group)) + | |
geom_polygon(aes(fill = rate_d), colour = alpha("white", 1/2), size = 0.2) + | |
geom_polygon(data = state_df, colour = "white", fill = NA) + | |
scale_fill_brewer(pal = "PuRd") | |
# Takes a while to draw because ggplot2 not very efficient with large numbers | |
# of polygons :( |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
On Windows, the slowness seems to be caused by alpha(). Changing this line
to the following
makes the plot draw almost instantaneously with only a slight change in the county border color.