Last active
September 3, 2015 13:18
-
-
Save trinker/4d43800e73c3a678b374 to your computer and use it in GitHub Desktop.
Plot Pies on a Map
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
load(url("http://dl.dropbox.com/u/61803503/nycounty.RData")) | |
head(ny); head(key) #view the data set from my drop box | |
if (!require("pacman")) install.packages("pacman") | |
p_install_version("ggtree", '1.0.14') | |
p_load(ggplot2, ggtree, dplyr, tidyr, sp, maps, pipeR, grid, XML, qdapRegex, magrittr) | |
getLabelPoint <- function(county) {Polygon(county[c('long', 'lat')])@labpt} | |
df <- map_data('county', 'new york') # NY region county data | |
centroids <- by(df, df$subregion, getLabelPoint) # Returns list | |
centroids <- do.call("rbind.data.frame", centroids) # Convert to Data Frame | |
names(centroids) <- c('long', 'lat') # Appropriate Header | |
pops <- "http://data.newsday.com/long-island/data/census/county-population-estimates-2012/" %>% | |
readHTMLTable(which=1) %>% | |
tbl_df() %>% | |
select(1:2) %>% | |
setNames(c("region", "population")) %>% | |
mutate( | |
population = {as.numeric(gsub("\\D", "", population))}, | |
region = tolower(gsub("\\s+[Cc]ounty|\\.", "", region)), | |
weight = exp(population/sum(population)) | |
) | |
race_data_long <- add_rownames(centroids, "region") %>>% | |
left_join({distinct(select(ny, region:other))}) %>>% | |
left_join(pops) %>>% | |
(~ race_data) %>>% | |
gather(race, prop, white:other) %>% | |
split(., .$region) | |
pies <- setNames(lapply(1:length(race_data_long), function(i){ | |
ggplot(race_data_long[[i]], aes(x=1, prop, fill=race)) + | |
geom_bar(stat="identity", width=1) + | |
coord_polar(theta="y") + | |
theme_tree() + | |
xlab(NULL) + | |
ylab(NULL) + | |
theme_transparent()# + | |
#theme(plot.margin=unit(c(0,0,0,0),"mm")) | |
}), names(race_data_long)) | |
p <- ggplot(ny, aes(long, lat, group=group)) + | |
geom_polygon(colour='black', fill=NA) | |
print(p) | |
for (i in 1:nrow(race_data)) { | |
nms <- names(race_data_long)[i] | |
dat <- race_data[race_data$region == nms, ] | |
p %<>% subview(pies[[i]], dat[["lat"]], dat[["long"]], dat[["weight"]], dat[["weight"]]) | |
print(p) | |
} | |
For one the height and width must be between 0-1 here is the gist with 0/1 bounds:
load(url("http://dl.dropbox.com/u/61803503/nycounty.RData"))
head(ny); head(key) #view the data set from my drop box
if (!require("pacman")) install.packages("pacman")
p_install_version("ggtree", '1.0.14')
p_load(ggplot2, ggtree, dplyr, tidyr, sp, maps, pipeR, grid, XML, qdapRegex, magrittr)
getLabelPoint <- function(county) {Polygon(county[c('long', 'lat')])@labpt}
df <- map_data('county', 'new york') # NY region county data
centroids <- by(df, df$subregion, getLabelPoint) # Returns list
centroids <- do.call("rbind.data.frame", centroids) # Convert to Data Frame
names(centroids) <- c('long', 'lat') # Appropriate Header
pops <- "http://data.newsday.com/long-island/data/census/county-population-estimates-2012/" %>%
readHTMLTable(which=1) %>%
tbl_df() %>%
select(1:2) %>%
setNames(c("region", "population")) %>%
mutate(
population = {as.numeric(gsub("\\D", "", population))},
region = tolower(gsub("\\s+[Cc]ounty|\\.", "", region)),
weight = ((1 - (1/(1 + exp(population/sum(population))))))
)
race_data_long <- add_rownames(centroids, "region") %>>%
left_join({distinct(select(ny, region:other))}) %>>%
left_join(pops) %>>%
(~ race_data) %>>%
gather(race, prop, white:other) %>%
split(., .$region)
pies <- setNames(lapply(1:length(race_data_long), function(i){
ggplot(race_data_long[[i]], aes(x=1, prop, fill=race)) +
geom_bar(stat="identity", width=1) +
coord_polar(theta="y") +
theme_tree() +
xlab(NULL) +
ylab(NULL) +
theme_transparent() +
theme(plot.margin=unit(c(0,0,0,0),"mm"))
}), names(race_data_long))
p <- ggplot(ny, aes(long, lat, group=group)) +
geom_polygon(colour='black', fill=NA)
print(p)
for (i in 1:nrow(race_data)) {
nms <- names(race_data_long)[i]
dat <- race_data[race_data$region == nms, ]
p %<>% subview(pies[[i]], dat[["lat"]], dat[["long"]], dat[["weight"]], dat[["weight"]])
print(p)
}
I replied in my blog comment that in x-y order is reverse, and you still not fix it.
p %<>% subview(pies[[i]], dat[["lat"]], dat[["long"]], dat[["weight"]], dat[["weight"]])
should be:
p %<>% subview(pies[[i]], dat[["long"]], dat[["lat"]], dat[["weight"]], dat[["weight"]])
Your data still have some issues.
- nrow(race_data) is 63 while pies only has 62.
- some data contains more than one records.
> i <- 31
> nms <- names(race_data_long)[i]
> dat <- race_data[race_data$region == nms, ]
> dat[["long"]]
[1] -73.97427 -73.97427
i in 1:30
is fine. I run your script with this range, and also change dat[["weight"]]
to dat[["weight"]]/10
.
Still getting an issue with size and a warning that I believe to be related. I captured in a video. The subplot renders as it did in the original plot of the pie, not scaled down and in the correct location: https://youtu.be/GDoVgpZ-7TQ
Below is the warning message and the code I used:
Warning messages:
1: In min(x, na.rm = na.rm) :
no non-missing arguments to min; returning Inf
2: In max(x, na.rm = na.rm) :
no non-missing arguments to max; returning -Inf
3: In min(x, na.rm = na.rm) :
no non-missing arguments to min; returning Inf
4: In max(x, na.rm = na.rm) :
no non-missing arguments to max; returning -Inf
load(url("http://dl.dropbox.com/u/61803503/nycounty.RData"))
head(ny); head(key) #view the data set from my drop box
if (!require("pacman")) install.packages("pacman")
p_install_version("ggtree", '1.0.14')
p_load(ggplot2, ggtree, dplyr, tidyr, sp, maps, pipeR, grid, XML, qdapRegex, magrittr)
getLabelPoint <- function(county) {Polygon(county[c('long', 'lat')])@labpt}
df <- map_data('county', 'new york') # NY region county data
centroids <- by(df, df$subregion, getLabelPoint) # Returns list
centroids <- do.call("rbind.data.frame", centroids) # Convert to Data Frame
names(centroids) <- c('long', 'lat') # Appropriate Header
pops <- "http://data.newsday.com/long-island/data/census/county-population-estimates-2012/" %>%
readHTMLTable(which=1) %>%
tbl_df() %>%
select(1:2) %>%
setNames(c("region", "population")) %>%
mutate(
population = {as.numeric(gsub("\\D", "", population))},
region = tolower(gsub("\\s+[Cc]ounty|\\.", "", region)),
weight = ((1 - (1/(1 + exp(population/sum(population))))))
)
race_data_long <- add_rownames(centroids, "region") %>>%
left_join({distinct(select(ny, region:other))}) %>>%
left_join(pops) %>>%
(~ race_data) %>>%
gather(race, prop, white:other) %>%
split(., .$region)
pies <- setNames(lapply(1:length(race_data_long), function(i){
ggplot(race_data_long[[i]], aes(x=1, prop, fill=race)) +
geom_bar(stat="identity", width=1) +
coord_polar(theta="y") +
theme_tree() +
xlab(NULL) +
ylab(NULL) +
theme_transparent() #+
#theme(plot.margin=unit(c(0,0,0,0),"mm"))
}), names(race_data_long))
p <- ggplot(ny, aes(long, lat, group=group)) +
geom_polygon(colour='black', fill=NA)
print(p)
#n <- nrow(race_data)
#n <- 30
n <- 1
for (i in 1:n) {
nms <- names(race_data_long)[i]
dat <- race_data[race_data$region == nms, ]
p <- subview(p, pies[[i]], x=unlist(dat[["long"]])[1], y=unlist(dat[["lat"]])[1])
print(p)
}
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This is an attempt to solve a long standing problem I have with mapping glyphs on a map: http://stackoverflow.com/questions/10368180/plotting-pie-graphs-on-map-in-ggplot
I saw this post: http://ygc.name/2015/08/31/subview/ and was excited as I think this can solve the problem with ease.