Last active
January 24, 2022 05:14
-
-
Save nvanderw/fc9f935ba7a428b2f2beaa385b343422 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# For finding the best wordle starting word | |
import argparse | |
import itertools | |
import math | |
import random | |
from dataclasses import dataclass | |
from multiprocessing import Pool | |
from typing import Any | |
def read_lines(path): | |
with open(path, "r") as handle: | |
line = handle.readline() | |
while line: | |
yield line.rstrip() | |
line = handle.readline() | |
def is_valid_wordle_word(word): | |
return len(word) == 5 and all(c.islower() for c in word) | |
def sample_with_replacement(list, num_samples): | |
for _ in range(num_samples): | |
yield list[random.randrange(0, len(list))] | |
def filter_from_answer(answer, guess, words): | |
for (i, c) in enumerate(guess): | |
if answer[i] == c: | |
# Right letter in right position | |
words = [word for word in words if word[i] == c] | |
elif c in answer: | |
words = [word for word in words if word[i] != c and c in word] | |
else: | |
words = [word for word in words if not c in word] | |
return words | |
def estimate_entropy(context, candidate_word): | |
num_samples = len(context.samples) | |
entropy_before = math.log2(len(context.wordle_words)) | |
sum = 0 | |
for sample in context.samples: | |
entropy_after = math.log2(len(filter_from_answer(sample, candidate_word, context.wordle_words))) | |
entropy_gained = entropy_before - entropy_after | |
sum += entropy_gained | |
return (candidate_word, sum / num_samples) | |
results = [] | |
# The main reason to put all this in the context object is so that multiprocessing can | |
# serialize/send it to the child processes, so that they can pass it along to estimate_entropy. | |
# I tried passing pool.map a function which was closed over these values and it failed because | |
# functions cannot be serialized. | |
@dataclass | |
class Context: | |
wordle_words: Any | |
samples: Any | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser(description="Find the best wordle starting words") | |
parser.add_argument( | |
'-p', | |
'--parallel', | |
metavar="N", | |
type=int, | |
nargs=1, | |
default=[None], | |
help="Number of worker processes to spawn. Defaults to OS cores detected.") | |
parser.add_argument( | |
'-d', | |
'--dictionary', | |
metavar="path", | |
type=str, | |
nargs=1, | |
required=True, | |
help="Path to dictionary (required).") | |
parser.add_argument( | |
'-s', | |
'--numsamples', | |
metavar="N", | |
type=int, | |
nargs=1, | |
default=[100], | |
help="Number of random samples to test each word against. More samples=more accuracy/more compute.") | |
parser.add_argument( | |
'-w', | |
'--numwords', | |
metavar="N", | |
type=int, | |
nargs=1, | |
default=[10], | |
help="Show top N words.") | |
args = parser.parse_args() | |
dictionary_path = args.dictionary[0] | |
wordle_words = [word for word in read_lines(dictionary_path) if is_valid_wordle_word(word)] | |
# Basic algorithm: for each wordle word, estimate the average amount of entropy gained by picking it. | |
entropy_before = math.log2(len(wordle_words)) | |
samples = list(sample_with_replacement(wordle_words, args.numsamples[0])) | |
context = Context(wordle_words, samples) | |
with Pool(args.parallel[0]) as pool: | |
results = pool.starmap( | |
estimate_entropy, | |
zip( | |
itertools.repeat(context, len(wordle_words)), | |
wordle_words)) | |
results.sort(key=lambda kvp: -kvp[1]) | |
print("word,average entropy") | |
for result in results[0:args.numwords[0]]: | |
print(f"{result[0]},{result[1]}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Example invocation: