Skip to content

Instantly share code, notes, and snippets.

@eliocamp
Created October 28, 2019 00:41
Show Gist options
  • Save eliocamp/b3fa57e87ae7c48df6ec2bdd43fe8121 to your computer and use it in GitHub Desktop.
Save eliocamp/b3fa57e87ae7c48df6ec2bdd43fe8121 to your computer and use it in GitHub Desktop.
Trends in gender neutral names in the US
library(ggplot2)
library(data.table)
library(magrittr)
# From https://www.ssa.gov/oact/babynames/limits.html
file <- "https://www.ssa.gov/oact/babynames/state/namesbystate.zip"
destination <- "/home/elio/Downloads/babies.zip"
download.file(file, destination)
dir <- "/home/elio/Downloads/babies"
dir.create(dir)
utils::unzip(destination, overwrite = TRUE, exdir = dir)
bb_state <- list.files("/home/elio/Downloads/babies/", full.names = TRUE, pattern = "*.TXT") %>%
lapply(fread) %>%
rbindlist() %>%
setnames(c("state", "sex", "year", "name", "n")) %>%
dcast(state + year + name ~ sex, value.var = "n", fill = 0) %>%
.[, odds := (0.5 + abs(F/(M + F) - 0.5))]
babynames <- babynames::babynames %>%
setDT() %>%
dcast(year + name ~ sex, value.var = "n", fill = 0) %>%
.[, odds := (0.5 + abs(F/(M + F) - 0.5))]
babynames %>%
.[, .(ratio = weighted.mean(odds, F + M)), by = .(year)] %>%
ggplot(aes(year, ratio)) +
geom_line() +
hrbrthemes::theme_ipsum_rc() +
labs(x = "Year of birth", y = NULL,
title = "Probability of correctly deducing the sex of a random person\nby knowing their name",
subtitle = "More prople are named with more gender-neutral (-ish) names.",
caption = "source: babynames package -> US SSA")
babynames %>%
.[, .(ratio = mean(odds)), by = .(year)] %>%
ggplot(aes(year, ratio)) +
geom_line() +
hrbrthemes::theme_ipsum_rc() +
labs(x = "Year of birth", y = NULL,
title = "Probability of correctly deducing the sex of a random person\nby knowing their name",
subtitle = "More prople are birthed with more gender-neutral (-ish) names.")
library(geofacet)
bb_state %>%
.[year >= 1950] %>%
.[, .(ratio = weighted.mean(odds, F + M)), by = .(year, state)] %>%
ggplot(aes(year, ratio)) +
geom_line() +
scale_x_continuous(breaks = c(1950, 2010)) +
scale_y_continuous(breaks = c(0.97, 1)) +
coord_cartesian(ylim = c(0.97, NA)) +
facet_geo(~ state, label = "code") +
theme_minimal() +
labs(x = "Year of birth", y = NULL,
caption = "source: US SSA")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment