Last active
December 19, 2019 05:56
-
-
Save jonspring/a8e3a77b5097af34f0473c7303b7277e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
dog_moves <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-12-17/dog_moves.csv') | |
dog_travel <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-12-17/dog_travel.csv') | |
dog_descriptions <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-12-17/dog_descriptions.csv') | |
library(tidyverse) | |
library(tidytext) | |
library(lubridate) | |
doggo_names <- dog_descriptions %>% | |
select(name, breed_primary, size, sex, size, contact_state, contact_zip, posted) | |
doggo_names %>% | |
count(sex) | |
doggo_names %>% | |
mutate(posted = ymd_hms(posted)) %>% | |
filter(posted >= ymd(20190101)) %>% | |
count(week = lubridate::floor_date(posted, "1 week")) %>% | |
ggplot(aes(week, n)) + | |
geom_line() | |
library(babynames) | |
name_ages <- babynames::babynames %>% | |
mutate(year_num = year*n) %>% | |
group_by(name, sex) %>% | |
summarise(amer = sum(n), | |
mean_birth = sum(year_num) / amer) %>% | |
mutate(sex = if_else(sex == "M", "Male", "Female")) | |
doggo_names %>% | |
mutate(name_1st = word(name, 1) %>% str_extract("[:alpha:]+") %>% str_to_title()) %>% | |
count(name_1st, name, sex, sort = T) %>% | |
filter(n >= 20) %>% | |
left_join(name_ages) %>% | |
mutate(amer = pmax(1, amer)) %>% | |
arrange(-n) -> joined | |
ggplot(joined, aes(n, amer, color = sex)) + | |
geom_point() + | |
scale_y_log10() + scale_x_log10() | |
note <- function(x = 1925, y = 5, label = "test") { | |
list(annotate("text", x = x, y = y, label = label, | |
family = ft, size = 7, hjust = 0)) | |
} | |
ggplot(joined, aes(mean_birth, amer/n, size = n, | |
fill = sex, color = sex, label = name)) + | |
# geom_smooth(se = F, method = "lm") + | |
# ggrepel::geom_text_repel(size = 4) + | |
# geom_point(alpha = 0.5) + | |
geom_text(size = 3, vjust = 0.5, check_overlap = T) + | |
note(1955, 85000, "moar hoomans") + | |
note(1958, 0.1, "moar doggos") + | |
note(1905, 100, "oldr namez") + | |
note(2015, 100, "noo namez") + | |
scale_size_area() + | |
guides(fill = F, color = F, size = F) + | |
scale_y_log10(breaks = NULL, name = "") + | |
scale_x_continuous(name = "Avg date of birth for Americans with this name", | |
breaks = 20*97:100, minor_breaks = NULL) + | |
ggthemes::scale_color_tableau(direction = -1) + | |
coord_cartesian(clip = "off", xlim = c(1910, 2032)) + | |
theme_minimal(base_size = 20, base_family = "Impact") + | |
labs(title = "DOG NAME MEMEZ", | |
subtitle = str_wrap(width = 100, | |
"TidyTuesday #51 of 2019, looking at the most common dog names in a PetFinder database of 58,000 dogs in the USA. The names were compared to human names from Social Security data available in the <babynames> package, to identify which names were proportionally more common in dogs or people, and to distinguish between older and newer names.")) + | |
theme(plot.subtitle = element_text(family = "Helvetica", size = 8), | |
axis.text.x = element_text(color = "gray85"), | |
panel.grid = element_blank(), | |
axis.title.x = element_text(color = "gray85", size = 13)) | |
ggsave("doggo.png", width = 7, height = 5, dpi = 300) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment