Created
January 6, 2022 21:37
-
-
Save scarpino/eb2f58745bab14417a6bca4955cf1f9c to your computer and use it in GitHub Desktop.
An R script to scrape the Wordle dictionaries and build a network of word distance. You can use this to find the best starting point for the game.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#SV Scarpino | |
#Playing Wordle | |
#Jan 6th 2022 | |
########### | |
#Libraries# | |
########### | |
library(rvest) | |
library(stringdist) | |
library(igraph) | |
library(dplyr) | |
######### | |
#Globals# | |
######### | |
do_plot <- FALSE | |
###### | |
#Data# | |
###### | |
raw_site <- read_html("https://www.powerlanguage.co.uk/wordle/main.db1931a8.js") %>% html_text() #read in the raw JS that has the word dictionaries | |
#a kind of lazy way to zero in on the dictionaries | |
split_site <- strsplit(x = raw_site, split = "[;]")[[1]] | |
find_dictionaries <- grep(pattern = "var Aa=", x = split_site) | |
dictionaries <- split_site[find_dictionaries] | |
split_dictionaries <- strsplit(x = dictionaries, split = "]")[[1]] | |
#non goal words | |
find_non_goal_words <- grep(pattern = "La=", x = split_dictionaries) | |
non_goal_words_raw <- split_dictionaries[find_non_goal_words] | |
non_goal_words_combine <- gsub(pattern = ",La=[", replacement = "", non_goal_words_raw, fixed = TRUE) | |
non_goal_words_split <- strsplit(x = non_goal_words_combine, split = ",")[[1]] | |
non_goal_words <- gsub(pattern = "[^A-Za-z0-9]", replacement = "", non_goal_words_split) | |
#goal words | |
find_goal_words <- grep(pattern = "var Aa=", x = split_dictionaries) | |
goal_words_raw <- split_dictionaries[find_goal_words] | |
goal_words_combine <- gsub(pattern = "var Aa=[", replacement = "", goal_words_raw, fixed = TRUE) | |
goal_words_split <- strsplit(x = goal_words_combine, split = ",")[[1]] | |
goal_words <- gsub(pattern = "[^A-Za-z0-9]", replacement = "", goal_words_split) | |
########## | |
#Analysis# | |
########## | |
d_dist <- stringdistmatrix(goal_words, goal_words, useNames = TRUE) | |
d_weight <- 1/d_dist | |
diag(d_weight) <- 0 | |
g <- graph.adjacency(d_weight, weighted = "weight") | |
deg <- strength(g) | |
cent <- eigen_centrality(g) | |
which.max(cent$vector) | |
if(do_plot == TRUE){ | |
png("wordle_net.png") | |
plot(g, vertex.size = 1, vertex.label = NA, color = "#00000075", edge.arrow.mode = 0, edge.width = deg/max(deg), edge.color = "#00000075") | |
dev.off() | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment