Skip to content

Instantly share code, notes, and snippets.

@tysonwepprich
Created March 23, 2022 20:04
Show Gist options
  • Save tysonwepprich/c7b3debc29c46d0780d081fcebc8e84e to your computer and use it in GitHub Desktop.
Save tysonwepprich/c7b3debc29c46d0780d081fcebc8e84e to your computer and use it in GitHub Desktop.
Babynames Elo rating system
# Install these packages if you don't have them
library(dplyr)
library(tidyr)
library(ggplot2)
library(elo)
library(ggrepel)
'%!in%' <- function(x,y)!('%in%'(x,y))
# babynames <- read.csv("data/national.csv", header = TRUE, stringsAsFactors = FALSE) # From SS admin download
babynames <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-03-22/babynames.csv')
# Choose filter for your set of names here
babyset <- babynames %>%
filter(year > 2000 & year <= 2018) %>% # choose which years to use (1880-2018)
group_by(name) %>%
summarise(numb = sum(n),
propF = sum(n[sex == "F"])/numb) %>%
filter(propF >= 0 & propF <= .8) %>% # choose sex (proportion females), here excluding names 80% or more female
arrange(-numb) %>%
mutate(usrank = row_number()) %>%
filter(usrank <= 400 & usrank >= 300) # choose rank (1 most popular, will change with the other filters you choose)
# Show your set of names to rank
print(sort(babyset$name))
# how should pairs be presented?
# random seems inefficient, but start there
# could sort by unasked pairs, or rearrange by Elo after so many rounds
# bump out names with lots of losses (instead of button)?
# Sets up the one vs one matches
allpairs <- expand.grid(team.Home = unique(babyset$name), team.Visitor = unique(babyset$name)) %>%
filter(team.Home != team.Visitor) %>%
mutate_if(is.factor, as.character) %>%
mutate(qorder = sample(nrow(.), size = nrow(.), replace = FALSE),
asked = 0) %>%
arrange(qorder)
outdf <- data.frame(team.Home = NA, team.Visitor = NA, match = NA, wins.A = NA, query = NA, stringsAsFactors = FALSE)
graveyard <- data.frame(name = NA, match = NA, stringsAsFactors = FALSE)
# Run this "while" loop to make choices pop up in the console
# Press 0 to exit loop when you're sick of it and want to see results
a <- 1
match <- 1
while(a > 0){
choice1 <- allpairs[which(allpairs$asked == 0), 1][1]
choice2 <- allpairs[which(allpairs$asked == 0), 2][1]
query <- menu(c(choice1, choice2, "Tie", "Never #1", "Never #2", "Remove both"), title="Pick the better name (press 0 to stop and rank)")
wins.A <- case_when(query %in% c(1, 5) ~ 1,
query %in% c(2, 4) ~ 0,
query %in% c(3, 6) ~ 0.5)
outdf[match, ] <- data.frame(team.Home = choice1, team.Visitor = choice2,
match = match, wins.A = wins.A, answer = query, stringsAsFactors = FALSE)
# update allpairs
allpairs$asked[which(allpairs$asked == 0)][1] <- 1
if(query == 4){
allpairs$asked[which(allpairs$team.Home == choice1 | allpairs$team.Visitor == choice1)] <- 1
graveyard[(nrow(graveyard)+1), ] <- data.frame(name = choice1, match = match, stringsAsFactors = FALSE)
}
if(query == 5){
allpairs$asked[which(allpairs$team.Home == choice2 | allpairs$team.Visitor == choice2)] <- 1
graveyard[(nrow(graveyard)+1), ] <- data.frame(name = choice2, match = match, stringsAsFactors = FALSE)
}
if(query == 6){
allpairs$asked[which(allpairs$team.Home == choice1 | allpairs$team.Visitor == choice1 | allpairs$team.Home == choice2 | allpairs$team.Visitor == choice2)] <- 1
graveyard[(nrow(graveyard)+1):(nrow(graveyard)+2), ] <- data.frame(name = c(choice1, choice2), match = c(match, match), stringsAsFactors = FALSE)
}
a <- query
match <- match + 1
print(paste(length(which(allpairs$asked == 0)), "matches to go"))
}
# Now get results
matchups <- outdf %>% filter(complete.cases(.))
e <- elo.run(wins.A ~ team.Home + team.Visitor, data = matchups, k = 50)
summary(e)
sort(rank.teams(e)[-which(names(rank.teams(e)) %in% graveyard$name)]) # YOUR BABY'S NAME, SPOILER
head(as.matrix(e))
str(as.data.frame(e))
# final.elos(e)
# matplot(as.matrix(e), type = "l")
# Set up data to plot
dat <- as.data.frame(as.matrix(e)) %>%
mutate(match = row_number()) %>%
pivot_longer(-match, names_to = "name", values_to = "elo")
maxmatch <- max(dat$match)
minelo <- min(dat$elo, na.rm = TRUE)
labeldat <- matchups[, c("team.Home", "match")] %>%
bind_rows(matchups[, c("team.Visitor", "match")]) %>%
rowwise() %>%
mutate(name = ifelse(is.na(team.Home), team.Visitor, team.Home)) %>%
left_join(dat) %>%
group_by(name) %>%
summarise(endscore = elo[order(-match)][1],
xmatch = ifelse(name[1] %in% graveyard$name, max(match, na.rm = TRUE), maxmatch))
labeldat$endscore[which(labeldat$name %in% graveyard$name)] <- rep(seq(minelo-110, minelo-10, by = 10), nrow(labeldat))
linedat <- dat %>%
filter(name %!in% graveyard$name)
theme_set(theme_bw(base_size = 14))
# Plots score over time (matches) and shows graveyard of rejected names along bottom.
ggplot(linedat, aes(x = match, y = elo, group = name, color = name)) +
geom_line() +
scale_x_continuous(limits = c(0, maxmatch * 1.1)) +
annotate(geom="text", x = maxmatch/2, y = 1400, label = "GRAVEYARD") +
ylab("Elo rating") + xlab("Matchup") +
geom_text_repel(data = labeldat, aes(x = xmatch, y = endscore, label = name), inherit.aes = FALSE, nudge_x = maxmatch * 0.01) +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.position = "none") +
ggtitle("Elo rating progression for 100 names in head-to-head matchups")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment