-
-
Save hadley/233134 to your computer and use it in GitHub Desktop.
| mid_range <- function(x) mean(range(x, na.rm = TRUE)) | |
| centres <- ddply(county_df, c("state", "county"), summarise, | |
| lat = mid_range(lat), | |
| long = mid_range(long) | |
| ) | |
| bubbles <- merge(centres, unemp, by = c("state", "county")) | |
| ggplot(bubbles, aes(long, lat)) + | |
| geom_polygon(aes(group = group), data = state_df, | |
| colour = "white", fill = NA) + | |
| geom_point(aes(size = rate), alpha = 1/2) + | |
| scale_area(to = c(0.5, 3), breaks = c(5, 10, 20, 30)) | |
| ggplot(bubbles, aes(long, lat)) + | |
| geom_polygon(aes(group = group), data = state_df, | |
| colour = "white", fill = NA) + | |
| geom_point(aes(color = rate_d)) + | |
| scale_colour_brewer(pal = "PuRd") |
| library(ggplot2) | |
| library(maps) | |
| # First (and most annoying) task - get matching state and county variables | |
| # for both datasets. And unfortauntely it's not quite right, as you can | |
| # see from the finish product - some counties are missing. | |
| unemp <- read.csv("unemployment09.csv", header = F, stringsAsFactors = F) | |
| names(unemp) <- c("id", "state_fips", "county_fips", "name", "year", | |
| "?", "?", "?", "rate") | |
| unemp$county <- tolower(gsub(" County, [A-Z]{2}", "", unemp$name)) | |
| unemp$state <- gsub("^.*([A-Z]{2}).*$", "\\1", unemp$name) | |
| county_df <- map_data("county") | |
| names(county_df) <- c("long", "lat", "group", "order", "state_name", "county") | |
| county_df$state <- state.abb[match(county_df$state_name, tolower(state.name))] | |
| county_df$state_name <- NULL | |
| state_df <- map_data("state") | |
| # Combine together | |
| choropleth <- merge(county_df, unemp, by = c("state", "county")) | |
| choropleth <- choropleth[order(choropleth$order), ] | |
| # Discretise rate to use with Brewer colour scheme - many options here | |
| # choropleth$rate_d <- cut_number(choropleth$rate, 5) | |
| # choropleth$rate_d <- cut_interval(choropleth$rate, 5) | |
| # Nathan's choice is a little odd: | |
| choropleth$rate_d <- cut(choropleth$rate, breaks = c(seq(0, 10, by = 2), 35)) | |
| # Once you have the data in the right format, recreating the plot is straight | |
| # forward. | |
| ggplot(choropleth, aes(long, lat, group = group)) + | |
| geom_polygon(aes(fill = rate_d), colour = alpha("white", 1/2), size = 0.2) + | |
| geom_polygon(data = state_df, colour = "white", fill = NA) + | |
| scale_fill_brewer(pal = "PuRd") | |
| # Takes a while to draw because ggplot2 not very efficient with large numbers | |
| # of polygons :( |
This was a very useful comment. Thanks!
@hadley: Thx for the code; I'm very new to maps and right at the moment don't find the time to dive in properly with a book, so working examples are a real time saver.
I had trouble running the codes. But after making following changes it worked.
library(scales) and replace pal = "PuRd" with palette = "PuRd" as suggested by shawneeunion above.
Thanks Professor Hadley, for the template solution. I get the following error, when I run finally the ggplot. Until then there is no error. Thanks for your help.
ggplot(choropleth, aes(long, lat, group = group)) +
geom_polygon(aes(fill = rate_d), colour = alpha("white", 1/2), size = 0.2) +geom_polygon(data = state_df, colour = "white", fill = NA) +scale_fill_brewer(pal = "PuRd")
Error in f(..., self = self) : attempt to apply non-function
R.Version()
$platform
[1] "x86_64-w64-mingw32"
$version.string
[1] "R version 3.2.3 (2015-12-10)"
On Windows, the slowness seems to be caused by alpha(). Changing this line
colour = alpha("white", 1/2)
to the following
colour = "gray85"
makes the plot draw almost instantaneously with only a slight change in the county border color.
I had trouble getting the scale_fill_brewer to work. Using "palette" rather than "pal" made the code run.