Created
February 5, 2022 18:51
-
-
Save billdenney/80097e1c5a0fb5f80d30dd0efa89f6d5 to your computer and use it in GitHub Desktop.
Solve all words in wordle
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Load all required libraries | |
library(tidyverse) | |
# Functions #### | |
#' Load the possible word lists from the Wordle source | |
#' | |
#' @param url The javascript source for finding the word lists | |
#' @param match_possible_words A word that is within the word list for the | |
#' possible words (mainly, words that people are likely to know, the Wordle | |
#' developer was kind) | |
#' @param match_all_words A word that is within the word list for all words | |
#' (more than just the possible words) | |
#' @param expected_characters The number of characters expected in every word, | |
#' used as a quality check for the outputs | |
#' @return A named list with names of "possible" and "all" for the possible | |
#' and all word lists | |
#' @export | |
load_wordle_word_lists <- function( | |
url="https://www.powerlanguage.co.uk/wordle/main.c1506a22.js", | |
match_possible_words="balmy", match_all_words="aahed", | |
expected_characters=5) { | |
word_source_raw <- readLines(url) | |
## There are two word lists in the source, one that is allowed words to be the | |
## correct answer, and the other that is all words allowed to be guessed | |
two_lists <- c(possible=match_possible_words, all=match_all_words) | |
two_lists_l <- list() | |
for (current_word_type in names(two_lists)) { | |
current_word <- two_lists[[current_word_type]] | |
mask <- grepl(x=word_source_raw, pattern=current_word) | |
word_source_words_raw <- | |
gsub( | |
x=word_source_raw[mask], | |
pattern=sprintf('^.*\\[([",a-z]*%s[",a-z]*)\\].*$', current_word), | |
replacement="\\1" | |
) | |
#word_source_words_raw <- gsub(x=word_source_raw[mask], pattern='^.*?\\[((?:\\"[a-z]{5}\\",?)+)\\].*?$', replacement="\\1") | |
two_lists_l[[current_word_type]] <- | |
gsub(x=strsplit(x=word_source_words_raw, split=",")[[1]], pattern='"', replacement="") | |
} | |
## All words that are allowed | |
words <- unname(unlist(two_lists_l)) | |
## All words that are possible | |
possible_words <- two_lists_l$balmy | |
# Make sure that loading the data worked correctly (all words should have 5 characters) | |
stopifnot(all(nchar(words) == expected_characters)) | |
words[nchar(words) != expected_characters] | |
two_lists_l | |
} | |
#' Generate a data.frame with all the letter/position for all words | |
#' | |
#' @param word The list of words (character vector) | |
#' @return A data.frame with rownames of \code{word} and the first column "word" | |
#' containing \code{word}. Then additional columns for each letter and | |
#' letter/position ("a", "a1", "a2", ...) with a boolean indicating if the | |
#' letter is in the word or the letter is in the word at the position. | |
#' @examples | |
#' add_word_attributes(c("binge", "splut")) | |
#' @export | |
wordle_has_letter_setup <- function(word) { | |
stopifnot(length(unique(nchar(word))) == 1) | |
ret <- data.frame(word=word) | |
for (current_letter in letters) { | |
ret[[current_letter]] <- grepl(x=word, pattern=current_letter, fixed=TRUE) | |
for (current_idx in seq_len(nchar(word[1]))) { | |
ret[[paste0(current_letter, current_idx)]] <- current_letter == substr(word, current_idx, current_idx) | |
} | |
} | |
rownames(ret) <- word | |
ret | |
} | |
#' Generate the "colors" for a word guess relative to the possible words | |
#' | |
#' @param guess The word that was guessed | |
#' @param possible The vector of possible words | |
#' @return An integer vector where 0 indicates no in the word, 1 indicates | |
#' elsewhere in the word, and 2 indicates at the current location in the word. | |
#' @export | |
wordle_guess_colors <- function(current_guess, wordle_has_letter_df) { | |
states <- rep(0, nrow(wordle_has_letter_df)) | |
for (idx in seq_len(nchar(current_guess))) { | |
current_letter <- substr(current_guess, idx, idx) | |
is_anywhere <- wordle_has_letter_df[, current_letter] | |
is_here <- wordle_has_letter_df[, paste0(current_letter, idx)] | |
states <- states*10 + (is_anywhere + is_here) | |
} | |
as.integer(states) | |
} | |
#' Generate a data.frame of "colors" where the first column are the possible | |
#' words to guess and the remaining columns are the colors generated by the | |
#' guessed word. | |
#' | |
#' @inheritParams wordle_guess_colors | |
wordle_guess_colors_df <- function(possible, all_words) { | |
wordle_has_letter_df <- wordle_has_letter_setup(possible) | |
all_words <- sort(unique(c(possible, all_words))) | |
ret <- data.frame(X=possible) | |
pb <- txtProgressBar(min=0, max=length(all_words), style=3) | |
for (current_all in all_words) { | |
setTxtProgressBar(pb, value=getTxtProgressBar(pb) + 1) | |
ret[[current_all]] <- | |
wordle_guess_colors( | |
current_guess=current_all, | |
wordle_has_letter_df=wordle_has_letter_df | |
) | |
} | |
ret | |
} | |
new_wordle_game <- function(correct_word, possible_df, expected_char=5) { | |
stopifnot(is.character(correct_word)) | |
stopifnot(length(correct_word) == 1) | |
stopifnot(nchar(correct_word) == expected_char) | |
list( | |
correct=correct_word, | |
possible_df=possible_df, | |
available_rows=rep(TRUE, nrow(possible_df)), | |
available_cols=c(FALSE, rep(TRUE, ncol(possible_df) - 1)) | |
) | |
} | |
wordle_game_state <- function(game_state, guess, verbose=TRUE) { | |
guess_value <- game_state$possible_df[game_state$possible_df$X == game_state$correct, guess] | |
history_new <- setNames(guess_value, guess) | |
game_state$history <- c(game_state$history, history_new) | |
game_state$available_rows <- game_state$available_rows & game_state$possible_df[, guess] == guess_value | |
# game_state$available_cols <- | |
# (names(game_state$available_cols) %in% game_state$possible_df$X) | |
if (verbose) { | |
message( | |
"Number of possible words: ", sum(game_state$available_rows), "\n", | |
"Number of possible guesses: ", sum(game_state$available_cols), "\n", | |
"History: ", paste(names(game_state$history), game_state$history, sep="=", collapse=", ") | |
) | |
} | |
game_state | |
} | |
wordle_choose_guess <- function(game_state, metric_fun, verbose=TRUE) { | |
possible_words <- game_state$possible_df[[1]][game_state$available_rows] | |
if (length(possible_words) < 3) { | |
# If there are two choices, guess | |
return(setNames(rep(-Inf, length(possible_words)), possible_words)) | |
} | |
if (!is.list(metric_fun)) { | |
metric_fun <- list(metric_fun) | |
} | |
if (verbose) { | |
pb <- txtProgressBar(min=0, max=sum(game_state$available_cols), style=3) | |
pb_metric_fun <- function(...) { | |
setTxtProgressBar(pb, value=getTxtProgressBar(pb) + 1) | |
metric_fun[[1]](...) | |
} | |
} else { | |
pb_metric_fun <- metric_fun[[1]] | |
} | |
metrics <- | |
sapply( | |
# operate only on the available rows from the available columns | |
X=game_state$possible_df[game_state$available_cols][game_state$available_rows, ], | |
FUN=pb_metric_fun | |
) | |
# First prioritize improved metrics. Then prioritize words that are possible | |
# (so that you can get it in 1 more). | |
ret_metrics <- metrics[order(metrics, -(names(metrics) %in% possible_words))] | |
# If there are more than one metric functions defined, break top-level ties with the next metric function | |
mask_equal_to_best <- ret_metrics == ret_metrics[[1]] | |
if ((length(metric_fun) > 1) & sum(mask_equal_to_best) > 1) { | |
metrics2 <- | |
sapply( | |
# operate only on the available rows from the available columns | |
X=game_state$possible_df[names(ret_metrics)][game_state$available_rows, ], | |
FUN=metric_fun[[2]] | |
) | |
ret <- ret_metrics[mask_equal_to_best][order(metrics2)] | |
} else { | |
ret <- ret_metrics | |
} | |
ret | |
} | |
wordle_choose_guess_cache <- function(metric_fun) { | |
cache <- force(list()) | |
metric_fun <- force(metric_fun) | |
function(game_state, verbose) { | |
history_chr <- paste(names(game_state$history), game_state$history, sep="=", collapse=", ") | |
if (history_chr %in% names(cache)) { | |
if (verbose) message("Using cache for ", history_chr, ": ", names(cache[[history_chr]])) | |
} else { | |
# Only store the first guess, since that is all that is used. | |
cache[[history_chr]] <<- wordle_choose_guess(game_state=game_state, metric_fun=metric_fun, verbose=verbose)[1] | |
} | |
cache[[history_chr]] | |
} | |
} | |
# Autoplay #### | |
wordle_autoplay <- function(game_state, guess_fun, initial_guess, verbose_guess=FALSE, verbose_state=FALSE) { | |
game_state <- wordle_game_state(game_state, guess=initial_guess, verbose=verbose_state) | |
while (game_state$history[length(game_state$history)] != 22222) { | |
current_guess <- guess_fun(game_state=game_state, verbose=verbose_guess) | |
game_state <- wordle_game_state(game_state, guess=names(current_guess)[1], verbose=verbose_state) | |
} | |
message( | |
"Game complete for ", game_state$correct, "; ", length(game_state$history), " guesses; ", | |
paste(names(game_state$history), game_state$history, sep="=", collapse=", ") | |
) | |
game_state | |
} | |
# Word selection metrics #### | |
wordle_metric_smallest_big_group <- function(x) { | |
max(summary(factor(x))) | |
} | |
wordle_metric_most_to_median_group <- function(x) { | |
sf <- rev(sort(summary(factor(x)))) | |
-unname(which(cumsum(sf) > (length(x)/2))[1]) | |
} | |
wordle_metric_smallest_median_group <- function(x) { | |
sf <- sort(summary(factor(x))) | |
unname(sf[which(cumsum(sf) > (length(x)/2))[1]]) | |
} | |
guess_cache_smallest_big_group <- wordle_choose_guess_cache(metric_fun=wordle_metric_smallest_big_group) | |
guess_cache_smallest_median_group <- wordle_choose_guess_cache(metric_fun=wordle_metric_smallest_median_group) | |
guess_cache_smallest_big_then_median_group <- wordle_choose_guess_cache(metric_fun=list(wordle_metric_smallest_big_group, wordle_metric_smallest_median_group)) | |
# Game Setup #### | |
words <- load_wordle_word_lists() | |
wordle_guesses_possible <- wordle_guess_colors_df(possible=words$possible, all_words=words$all) | |
# Start a game #### | |
# game_pleat <- new_wordle_game(correct_word="pleat", possible_df=wordle_guesses_possible) | |
# initial_guess_smallest_big_group <- wordle_choose_guess(game_state=game_pleat, metric_fun=wordle_metric_smallest_big_group) | |
# initial_guess_most_to_median_group <- wordle_choose_guess(game_state=game_pleat, metric_fun=wordle_metric_most_to_median_group) | |
# initial_guess_smallest_median_group <- wordle_choose_guess(game_state=game_pleat, metric_fun=wordle_metric_smallest_median_group) | |
pb_all_words <- txtProgressBar(min=0, max=nrow(wordle_guesses_possible), style=3) | |
ret <- list() | |
for (current_word in sort(wordle_guesses_possible$X)) { | |
setTxtProgressBar(pb_all_words, value=getTxtProgressBar(pb_all_words) + 1, title="All words") | |
ret[[current_word]] <- | |
wordle_autoplay( | |
game_state=new_wordle_game(correct_word=current_word, possible_df=wordle_guesses_possible), | |
guess_fun=guess_cache_smallest_big_group, | |
initial_guess="raise" | |
) | |
} | |
guess_distribution <- sapply(X=ret, FUN=function(x) length(x$history)) | |
ggplot2::ggplot(data=data.frame(distr=guess_distribution), aes(x=distr)) + | |
geom_histogram() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment