Created
October 28, 2019 00:41
-
-
Save eliocamp/b3fa57e87ae7c48df6ec2bdd43fe8121 to your computer and use it in GitHub Desktop.
Trends in gender neutral names in the US
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(ggplot2) | |
library(data.table) | |
library(magrittr) | |
# From https://www.ssa.gov/oact/babynames/limits.html | |
file <- "https://www.ssa.gov/oact/babynames/state/namesbystate.zip" | |
destination <- "/home/elio/Downloads/babies.zip" | |
download.file(file, destination) | |
dir <- "/home/elio/Downloads/babies" | |
dir.create(dir) | |
utils::unzip(destination, overwrite = TRUE, exdir = dir) | |
bb_state <- list.files("/home/elio/Downloads/babies/", full.names = TRUE, pattern = "*.TXT") %>% | |
lapply(fread) %>% | |
rbindlist() %>% | |
setnames(c("state", "sex", "year", "name", "n")) %>% | |
dcast(state + year + name ~ sex, value.var = "n", fill = 0) %>% | |
.[, odds := (0.5 + abs(F/(M + F) - 0.5))] | |
babynames <- babynames::babynames %>% | |
setDT() %>% | |
dcast(year + name ~ sex, value.var = "n", fill = 0) %>% | |
.[, odds := (0.5 + abs(F/(M + F) - 0.5))] | |
babynames %>% | |
.[, .(ratio = weighted.mean(odds, F + M)), by = .(year)] %>% | |
ggplot(aes(year, ratio)) + | |
geom_line() + | |
hrbrthemes::theme_ipsum_rc() + | |
labs(x = "Year of birth", y = NULL, | |
title = "Probability of correctly deducing the sex of a random person\nby knowing their name", | |
subtitle = "More prople are named with more gender-neutral (-ish) names.", | |
caption = "source: babynames package -> US SSA") | |
babynames %>% | |
.[, .(ratio = mean(odds)), by = .(year)] %>% | |
ggplot(aes(year, ratio)) + | |
geom_line() + | |
hrbrthemes::theme_ipsum_rc() + | |
labs(x = "Year of birth", y = NULL, | |
title = "Probability of correctly deducing the sex of a random person\nby knowing their name", | |
subtitle = "More prople are birthed with more gender-neutral (-ish) names.") | |
library(geofacet) | |
bb_state %>% | |
.[year >= 1950] %>% | |
.[, .(ratio = weighted.mean(odds, F + M)), by = .(year, state)] %>% | |
ggplot(aes(year, ratio)) + | |
geom_line() + | |
scale_x_continuous(breaks = c(1950, 2010)) + | |
scale_y_continuous(breaks = c(0.97, 1)) + | |
coord_cartesian(ylim = c(0.97, NA)) + | |
facet_geo(~ state, label = "code") + | |
theme_minimal() + | |
labs(x = "Year of birth", y = NULL, | |
caption = "source: US SSA") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment