Created
March 28, 2019 22:30
-
-
Save arvi1000/abfca5bd21237ceb85e65db5df709e0c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
# read data | |
dat_raw <- read_csv('http://infographics.economist.com/databank/Economist_women-research.csv') | |
# drop stuff that's not the data | |
dat <- dat_raw[2:13,] | |
# fix names | |
names(dat) <- | |
dat_raw[1,] %>% | |
t %>% | |
unname %>% | |
tolower %>% | |
sub('%.*', 'inventors', .) %>% | |
gsub('science(s|\\b)?', 'sci', .) | |
# convert to long format. melt() forever, by the way -- sorry, tidyverse. | |
dat <- data.table::melt(dat, id.var='country') | |
dat$value <- as.numeric(dat$value) | |
# a bunch of mutations | |
dat <- | |
dat %>% mutate( | |
# group countries | |
country2 = | |
ifelse(country %in% c('Portugal', 'Japan', 'Brazil'), | |
country, 'Others*') %>% | |
factor(levels = rev(c('Japan', 'Others*', 'Brazil', 'Portugal')), | |
ordered = T), | |
# field of study to factor | |
variable = as.factor(variable)) %>% | |
# add offset for same-value collisions | |
group_by(variable, value) %>% | |
mutate(offset = dense_rank(country) - 1) | |
# economist-type colors, courtesy ggthemes | |
get_econ_clr <- function(clr_name) { | |
filter(ggthemes::ggthemes_data[["economist"]]$fg, | |
name == clr_name)$value[1] | |
} | |
plot_clrs <- c(Japan = get_econ_clr('pink'), | |
Portugal = get_econ_clr('dark blue'), | |
Brazil = get_econ_clr('light blue'), | |
`Others*` = get_econ_clr('gray')) | |
# make the plot. all the hard coded size values are cherry-picked/eyeballed | |
# for a specific plot window size | |
dat %>% | |
# drop this category | |
filter(variable != 'inventors') %>% | |
# this weird aes(x) call is so dots will stack up | |
ggplot(aes(x=as.numeric(variable) + offset/7, y=value)) + | |
# points and an 50% reference line | |
geom_point(size=1.5, aes(color=country2)) + | |
geom_hline(yintercept = .5) + | |
# make it wide not tall | |
coord_flip() + | |
scale_color_manual(values = plot_clrs) + | |
scale_x_continuous(breaks = seq_along(levels(dat$variable)), | |
labels = levels(dat$variable), | |
expand = c(.05,.05)) + | |
scale_y_continuous(expand = c(0, 0), limits = c(0,1), | |
labels = scales::percent) + | |
theme_minimal() + | |
theme(plot.title = element_text(hjust=3.5), | |
panel.grid.minor.y = element_blank(), | |
plot.caption = element_text(color='grey50')) + | |
labs(title = "Share of published researchers who are women, by field & country", | |
caption = paste0("*", other_countries), | |
x=NULL, y=NULL, color=NULL) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Result: