tysonwepprich · March 23, 2022 20:04
diff --git a/babyELO.R b/babyELO.R
 # Install these packages if you don't have them
 library(dplyr)
 library(tidyr)
 library(ggplot2)
 library(elo)
 library(ggrepel)
 '%!in%' <- function(x,y)!('%in%'(x,y))

 # babynames <- read.csv("data/national.csv", header = TRUE, stringsAsFactors = FALSE) # From SS admin download
 babynames <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-03-22/babynames.csv')

 # Choose filter for your set of names here
 babyset <- babynames %>% 
  filter(year > 2000 & year <= 2018) %>% # choose which years to use (1880-2018)
  group_by(name) %>% 
  summarise(numb = sum(n),
            propF = sum(n[sex == "F"])/numb) %>% 
  filter(propF >= 0 & propF <= .8) %>% # choose sex (proportion females), here excluding names 80% or more female
  arrange(-numb) %>% 
  mutate(usrank = row_number()) %>% 
  filter(usrank <= 400 & usrank >= 300) # choose rank (1 most popular, will change with the other filters you choose)

 # Show your set of names to rank
 print(sort(babyset$name))

 # how should pairs be presented?
 # random seems inefficient, but start there
 # could sort by unasked pairs, or rearrange by Elo after so many rounds
 # bump out names with lots of losses (instead of button)?

 # Sets up the one vs one matches
 allpairs <- expand.grid(team.Home = unique(babyset$name), team.Visitor = unique(babyset$name)) %>% 
  filter(team.Home != team.Visitor) %>%
  mutate_if(is.factor, as.character) %>% 
  mutate(qorder = sample(nrow(.), size = nrow(.), replace = FALSE),
         asked = 0) %>% 
  arrange(qorder)

 outdf <- data.frame(team.Home = NA, team.Visitor = NA, match = NA, wins.A = NA, query = NA, stringsAsFactors = FALSE)
 graveyard <- data.frame(name = NA, match = NA, stringsAsFactors = FALSE)

 # Run this "while" loop to make choices pop up in the console
 # Press 0 to exit loop when you're sick of it and want to see results
 a <- 1
 match <- 1
 while(a > 0){
  choice1 <- allpairs[which(allpairs$asked == 0), 1][1]
  choice2 <- allpairs[which(allpairs$asked == 0), 2][1]
  
  query <- menu(c(choice1, choice2, "Tie", "Never #1", "Never #2", "Remove both"), title="Pick the better name (press 0 to stop and rank)")
  wins.A <- case_when(query %in% c(1, 5) ~ 1, 
                      query %in% c(2, 4) ~ 0,
                      query %in% c(3, 6) ~ 0.5)
  outdf[match, ] <- data.frame(team.Home = choice1, team.Visitor = choice2,
                               match = match, wins.A = wins.A, answer = query, stringsAsFactors = FALSE)
  # update allpairs
  allpairs$asked[which(allpairs$asked == 0)][1] <- 1
  if(query == 4){
    allpairs$asked[which(allpairs$team.Home == choice1 | allpairs$team.Visitor == choice1)] <- 1
    graveyard[(nrow(graveyard)+1), ] <- data.frame(name = choice1, match = match, stringsAsFactors = FALSE)
  }
  if(query == 5){
    allpairs$asked[which(allpairs$team.Home == choice2 | allpairs$team.Visitor == choice2)] <- 1
    graveyard[(nrow(graveyard)+1), ] <- data.frame(name = choice2, match = match, stringsAsFactors = FALSE)
  }
  if(query == 6){
    allpairs$asked[which(allpairs$team.Home == choice1 | allpairs$team.Visitor == choice1 | allpairs$team.Home == choice2 | allpairs$team.Visitor == choice2)] <- 1
    graveyard[(nrow(graveyard)+1):(nrow(graveyard)+2), ] <- data.frame(name = c(choice1, choice2), match = c(match, match), stringsAsFactors = FALSE)
  }
  
  a <- query
  match <- match + 1
  print(paste(length(which(allpairs$asked == 0)), "matches to go"))
 }


 # Now get results
 matchups <- outdf %>% filter(complete.cases(.))

 e <- elo.run(wins.A ~ team.Home + team.Visitor, data = matchups, k = 50)

 summary(e)
 sort(rank.teams(e)[-which(names(rank.teams(e)) %in% graveyard$name)]) # YOUR BABY'S NAME, SPOILER
 head(as.matrix(e))
 str(as.data.frame(e))
 # final.elos(e)
 # matplot(as.matrix(e), type = "l")


 # Set up data to plot
 dat <- as.data.frame(as.matrix(e)) %>% 
  mutate(match = row_number()) %>% 
  pivot_longer(-match, names_to = "name", values_to = "elo")

 maxmatch <- max(dat$match)
 minelo <- min(dat$elo, na.rm = TRUE)

 labeldat <- matchups[, c("team.Home", "match")] %>%
  bind_rows(matchups[, c("team.Visitor", "match")]) %>% 
  rowwise() %>% 
  mutate(name = ifelse(is.na(team.Home), team.Visitor, team.Home)) %>% 
  left_join(dat) %>% 
  group_by(name) %>% 
  summarise(endscore = elo[order(-match)][1],
            xmatch = ifelse(name[1] %in% graveyard$name, max(match, na.rm = TRUE), maxmatch))

 labeldat$endscore[which(labeldat$name %in% graveyard$name)] <- rep(seq(minelo-110, minelo-10, by = 10), nrow(labeldat))
 linedat <- dat %>% 
  filter(name %!in% graveyard$name)

 theme_set(theme_bw(base_size = 14)) 

 # Plots score over time (matches) and shows graveyard of rejected names along bottom.
 ggplot(linedat, aes(x = match, y = elo, group = name, color = name)) +
  geom_line() +
  scale_x_continuous(limits = c(0, maxmatch * 1.1)) +
  annotate(geom="text", x = maxmatch/2, y = 1400, label = "GRAVEYARD") +
  ylab("Elo rating") + xlab("Matchup") +
  geom_text_repel(data = labeldat, aes(x = xmatch, y = endscore, label = name), inherit.aes = FALSE, nudge_x = maxmatch * 0.01) +
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        legend.position = "none") +
  ggtitle("Elo rating progression for 100 names in head-to-head matchups")
	# Install these packages if you don't have them
	library(dplyr)
	library(tidyr)
	library(ggplot2)
	library(elo)
	library(ggrepel)
	'%!in%' <- function(x,y)!('%in%'(x,y))

	# babynames <- read.csv("data/national.csv", header = TRUE, stringsAsFactors = FALSE) # From SS admin download
	babynames <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-03-22/babynames.csv')

	# Choose filter for your set of names here
	babyset <- babynames %>%
	filter(year > 2000 & year <= 2018) %>% # choose which years to use (1880-2018)
	group_by(name) %>%
	summarise(numb = sum(n),
	propF = sum(n[sex == "F"])/numb) %>%
	filter(propF >= 0 & propF <= .8) %>% # choose sex (proportion females), here excluding names 80% or more female
	arrange(-numb) %>%
	mutate(usrank = row_number()) %>%
	filter(usrank <= 400 & usrank >= 300) # choose rank (1 most popular, will change with the other filters you choose)

	# Show your set of names to rank
	print(sort(babyset$name))

	# how should pairs be presented?
	# random seems inefficient, but start there
	# could sort by unasked pairs, or rearrange by Elo after so many rounds
	# bump out names with lots of losses (instead of button)?

	# Sets up the one vs one matches
	allpairs <- expand.grid(team.Home = unique(babyset$name), team.Visitor = unique(babyset$name)) %>%
	filter(team.Home != team.Visitor) %>%
	mutate_if(is.factor, as.character) %>%
	mutate(qorder = sample(nrow(.), size = nrow(.), replace = FALSE),
	asked = 0) %>%
	arrange(qorder)

	outdf <- data.frame(team.Home = NA, team.Visitor = NA, match = NA, wins.A = NA, query = NA, stringsAsFactors = FALSE)
	graveyard <- data.frame(name = NA, match = NA, stringsAsFactors = FALSE)

	# Run this "while" loop to make choices pop up in the console
	# Press 0 to exit loop when you're sick of it and want to see results
	a <- 1
	match <- 1
	while(a > 0){
	choice1 <- allpairs[which(allpairs$asked == 0), 1][1]
	choice2 <- allpairs[which(allpairs$asked == 0), 2][1]

	query <- menu(c(choice1, choice2, "Tie", "Never #1", "Never #2", "Remove both"), title="Pick the better name (press 0 to stop and rank)")
	wins.A <- case_when(query %in% c(1, 5) ~ 1,
	query %in% c(2, 4) ~ 0,
	query %in% c(3, 6) ~ 0.5)
	outdf[match, ] <- data.frame(team.Home = choice1, team.Visitor = choice2,
	match = match, wins.A = wins.A, answer = query, stringsAsFactors = FALSE)
	# update allpairs
	allpairs$asked[which(allpairs$asked == 0)][1] <- 1
	if(query == 4){
	allpairs$asked[which(allpairs$team.Home == choice1 \| allpairs$team.Visitor == choice1)] <- 1
	graveyard[(nrow(graveyard)+1), ] <- data.frame(name = choice1, match = match, stringsAsFactors = FALSE)
	}
	if(query == 5){
	allpairs$asked[which(allpairs$team.Home == choice2 \| allpairs$team.Visitor == choice2)] <- 1
	graveyard[(nrow(graveyard)+1), ] <- data.frame(name = choice2, match = match, stringsAsFactors = FALSE)
	}
	if(query == 6){
	allpairs$asked[which(allpairs$team.Home == choice1 \| allpairs$team.Visitor == choice1 \| allpairs$team.Home == choice2 \| allpairs$team.Visitor == choice2)] <- 1
	graveyard[(nrow(graveyard)+1):(nrow(graveyard)+2), ] <- data.frame(name = c(choice1, choice2), match = c(match, match), stringsAsFactors = FALSE)
	}

	a <- query
	match <- match + 1
	print(paste(length(which(allpairs$asked == 0)), "matches to go"))
	}


	# Now get results
	matchups <- outdf %>% filter(complete.cases(.))

	e <- elo.run(wins.A ~ team.Home + team.Visitor, data = matchups, k = 50)

	summary(e)
	sort(rank.teams(e)[-which(names(rank.teams(e)) %in% graveyard$name)]) # YOUR BABY'S NAME, SPOILER
	head(as.matrix(e))
	str(as.data.frame(e))
	# final.elos(e)
	# matplot(as.matrix(e), type = "l")


	# Set up data to plot
	dat <- as.data.frame(as.matrix(e)) %>%
	mutate(match = row_number()) %>%
	pivot_longer(-match, names_to = "name", values_to = "elo")

	maxmatch <- max(dat$match)
	minelo <- min(dat$elo, na.rm = TRUE)

	labeldat <- matchups[, c("team.Home", "match")] %>%
	bind_rows(matchups[, c("team.Visitor", "match")]) %>%
	rowwise() %>%
	mutate(name = ifelse(is.na(team.Home), team.Visitor, team.Home)) %>%
	left_join(dat) %>%
	group_by(name) %>%
	summarise(endscore = elo[order(-match)][1],
	xmatch = ifelse(name[1] %in% graveyard$name, max(match, na.rm = TRUE), maxmatch))

	labeldat$endscore[which(labeldat$name %in% graveyard$name)] <- rep(seq(minelo-110, minelo-10, by = 10), nrow(labeldat))
	linedat <- dat %>%
	filter(name %!in% graveyard$name)

	theme_set(theme_bw(base_size = 14))

	# Plots score over time (matches) and shows graveyard of rejected names along bottom.
	ggplot(linedat, aes(x = match, y = elo, group = name, color = name)) +
	geom_line() +
	scale_x_continuous(limits = c(0, maxmatch * 1.1)) +
	annotate(geom="text", x = maxmatch/2, y = 1400, label = "GRAVEYARD") +
	ylab("Elo rating") + xlab("Matchup") +
	geom_text_repel(data = labeldat, aes(x = xmatch, y = endscore, label = name), inherit.aes = FALSE, nudge_x = maxmatch * 0.01) +
	theme(panel.grid.major = element_blank(),
	panel.grid.minor = element_blank(),
	legend.position = "none") +
	ggtitle("Elo rating progression for 100 names in head-to-head matchups")