Skip to content

Instantly share code, notes, and snippets.

@thoughtfulbloke
Created November 13, 2019 23:40
Show Gist options
  • Save thoughtfulbloke/1858d6bad4c9e06f700e84d7aa3e46a7 to your computer and use it in GitHub Desktop.
Save thoughtfulbloke/1858d6bad4c9e06f700e84d7aa3e46a7 to your computer and use it in GitHub Desktop.
Voting similarity among Bird of the Year 2019 candidates
library(Rtsne)
library(vroom)
library(dplyr)
library(tidyr)
library(stringr)
library(ggplot2)
library(ggrepel)
library(janitor)
# data from https://www.dragonfly.co.nz/news/2019-11-12-boty.html
# CC attribution 4 licence
BOTY <- vroom("BOTY-votes-2019.csv", delim = ",")
long <- BOTY %>% mutate(voter=paste0("vote",row_number())) %>%
gather(vorder, vote, vote_1:vote_5) %>%
arrange(voter,vorder)
birds <- data.frame(vote = unique(long$vote),
safe = make_clean_names(unique(long$vote)),
stringsAsFactors = FALSE) %>%
filter(!is.na(vote))
longclean <- long %>% inner_join(birds, by = "vote")
paired_clean <- longclean %>%
arrange(voter,safe) %>%
group_by(voter) %>%
summarise(x12 = paste(safe[1],safe[2]),
x13 = paste(safe[1],safe[3]),
x14 = paste(safe[1],safe[4]),
x15 = paste(safe[1],safe[5]),
x23 = paste(safe[2],safe[3]),
x24 = paste(safe[2],safe[4]),
x25 = paste(safe[2],safe[5]),
x34 = paste(safe[3],safe[4]),
x35 = paste(safe[3],safe[5]),
x45 = paste(safe[4],safe[5])) %>%
ungroup() %>%
gather(vorder, vpair, x12:x45) %>%
count(vpair, sort=TRUE) %>%
filter(!str_detect(vpair, fixed("NA")))
results <- data.frame(species = unique(longclean$safe),
stringsAsFactors = FALSE)
for (abird in results$species) {
addenda <- paired_clean %>%
filter(str_detect(vpair, fixed(abird))) %>%
mutate(species = trimws(gsub(abird, "", vpair))) %>%
mutate(prop = n/sum(n)) %>%
select(species, prop)
results <- results %>% left_join(addenda, by = "species") %>%
mutate(prop = ifelse(is.na(prop), 0, prop))
names(results)[ncol(results)] <- abird
}
tsne <- Rtsne(results[,2:ncol(results)], dims = 2, perplexity=28, verbose=FALSE, theta=0.0, max_iter = 500, check_duplicates=FALSE)
tsne2D <- as.data.frame(tsne$Y)
tsne2D$safe = results$species
df <- tsne2D %>% inner_join(birds, by="safe")
ggplot(df, aes(x=V1,y=V2, label=vote)) +
theme_void() +
ggtitle("2 dimesional (TNSE) representation of similarity by Bird of the Year 2019 covotes") +
geom_point(size=1.2) +
geom_text_repel( size=2.5)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment