jonspring · December 19, 2019 05:56
diff --git a/TidyTuesday 2019 #51: Dog namez, hooman namez b/TidyTuesday 2019 #51: Dog namez, hooman namez
 dog_moves <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-12-17/dog_moves.csv')
 dog_travel <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-12-17/dog_travel.csv')
 dog_descriptions <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-12-17/dog_descriptions.csv')

 library(tidyverse)
 library(tidytext)
 library(lubridate)

 doggo_names <- dog_descriptions %>%
  select(name, breed_primary, size, sex, size, contact_state, contact_zip, posted)

 doggo_names %>%
  count(sex)

 doggo_names %>%
  mutate(posted = ymd_hms(posted)) %>%
  filter(posted >= ymd(20190101)) %>%
  count(week = lubridate::floor_date(posted, "1 week")) %>%
  ggplot(aes(week, n)) +
  geom_line() 
  

 library(babynames)
 name_ages <- babynames::babynames %>% 
  mutate(year_num = year*n) %>%
  group_by(name, sex) %>%
  summarise(amer = sum(n),
            mean_birth = sum(year_num) / amer) %>%
  mutate(sex = if_else(sex == "M", "Male", "Female"))
  

 doggo_names %>%
  mutate(name_1st = word(name, 1) %>% str_extract("[:alpha:]+") %>% str_to_title()) %>%
  count(name_1st, name, sex, sort = T) %>%
  filter(n >= 20) %>%
  left_join(name_ages) %>%
  mutate(amer = pmax(1, amer)) %>%
  arrange(-n) -> joined
  
 ggplot(joined, aes(n, amer, color = sex)) + 
  geom_point() +
  scale_y_log10() + scale_x_log10()


 note <- function(x = 1925, y = 5, label = "test") {
  list(annotate("text", x = x, y = y, label = label,
                family = ft, size = 7, hjust = 0))
 }

 ggplot(joined, aes(mean_birth, amer/n, size = n, 
                   fill = sex, color = sex, label = name)) + 
 # geom_smooth(se = F, method = "lm") +
  # ggrepel::geom_text_repel(size = 4) +
  # geom_point(alpha = 0.5) +
  geom_text(size = 3, vjust = 0.5, check_overlap = T) +
  note(1955, 85000, "moar hoomans") +
  note(1958, 0.1, "moar doggos") +
  note(1905, 100, "oldr namez") +
  note(2015, 100, "noo namez") +
  scale_size_area() +
  guides(fill = F, color = F, size = F) +
  scale_y_log10(breaks = NULL, name = "") +
  scale_x_continuous(name = "Avg date of birth for Americans with this name", 
                     breaks = 20*97:100, minor_breaks = NULL) +
  ggthemes::scale_color_tableau(direction = -1) +
  coord_cartesian(clip = "off", xlim = c(1910, 2032)) +
  theme_minimal(base_size = 20, base_family = "Impact") +
  labs(title = "DOG NAME MEMEZ",
       subtitle = str_wrap(width = 100, 
                           "TidyTuesday #51 of 2019, looking at the most common dog names in a PetFinder database of 58,000 dogs in the USA. The names were compared to human names from Social Security data available in the <babynames> package, to identify which names were proportionally more common in dogs or people, and to distinguish between older and newer names.")) +
  theme(plot.subtitle = element_text(family = "Helvetica", size = 8),
        axis.text.x = element_text(color = "gray85"),
        panel.grid = element_blank(),
        axis.title.x = element_text(color = "gray85", size = 13))


 ggsave("doggo.png", width = 7, height = 5, dpi = 300)
	dog_moves <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-12-17/dog_moves.csv')
	dog_travel <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-12-17/dog_travel.csv')
	dog_descriptions <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-12-17/dog_descriptions.csv')

	library(tidyverse)
	library(tidytext)
	library(lubridate)

	doggo_names <- dog_descriptions %>%
	select(name, breed_primary, size, sex, size, contact_state, contact_zip, posted)

	doggo_names %>%
	count(sex)

	doggo_names %>%
	mutate(posted = ymd_hms(posted)) %>%
	filter(posted >= ymd(20190101)) %>%
	count(week = lubridate::floor_date(posted, "1 week")) %>%
	ggplot(aes(week, n)) +
	geom_line()


	library(babynames)
	name_ages <- babynames::babynames %>%
	mutate(year_num = year*n) %>%
	group_by(name, sex) %>%
	summarise(amer = sum(n),
	mean_birth = sum(year_num) / amer) %>%
	mutate(sex = if_else(sex == "M", "Male", "Female"))


	doggo_names %>%
	mutate(name_1st = word(name, 1) %>% str_extract("[:alpha:]+") %>% str_to_title()) %>%
	count(name_1st, name, sex, sort = T) %>%
	filter(n >= 20) %>%
	left_join(name_ages) %>%
	mutate(amer = pmax(1, amer)) %>%
	arrange(-n) -> joined

	ggplot(joined, aes(n, amer, color = sex)) +
	geom_point() +
	scale_y_log10() + scale_x_log10()


	note <- function(x = 1925, y = 5, label = "test") {
	list(annotate("text", x = x, y = y, label = label,
	family = ft, size = 7, hjust = 0))
	}

	ggplot(joined, aes(mean_birth, amer/n, size = n,
	fill = sex, color = sex, label = name)) +
	# geom_smooth(se = F, method = "lm") +
	# ggrepel::geom_text_repel(size = 4) +
	# geom_point(alpha = 0.5) +
	geom_text(size = 3, vjust = 0.5, check_overlap = T) +
	note(1955, 85000, "moar hoomans") +
	note(1958, 0.1, "moar doggos") +
	note(1905, 100, "oldr namez") +
	note(2015, 100, "noo namez") +
	scale_size_area() +
	guides(fill = F, color = F, size = F) +
	scale_y_log10(breaks = NULL, name = "") +
	scale_x_continuous(name = "Avg date of birth for Americans with this name",
	breaks = 20*97:100, minor_breaks = NULL) +
	ggthemes::scale_color_tableau(direction = -1) +
	coord_cartesian(clip = "off", xlim = c(1910, 2032)) +
	theme_minimal(base_size = 20, base_family = "Impact") +
	labs(title = "DOG NAME MEMEZ",
	subtitle = str_wrap(width = 100,
	"TidyTuesday #51 of 2019, looking at the most common dog names in a PetFinder database of 58,000 dogs in the USA. The names were compared to human names from Social Security data available in the <babynames> package, to identify which names were proportionally more common in dogs or people, and to distinguish between older and newer names.")) +
	theme(plot.subtitle = element_text(family = "Helvetica", size = 8),
	axis.text.x = element_text(color = "gray85"),
	panel.grid = element_blank(),
	axis.title.x = element_text(color = "gray85", size = 13))


	ggsave("doggo.png", width = 7, height = 5, dpi = 300)
No results found