Created
April 9, 2022 21:32
-
-
Save Radcliffe/7cc34028db9fe6d1643bf0e893dca42e to your computer and use it in GitHub Desktop.
Python script to determine the optimal starting guess for Wordle using Shannon information entropy.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This Python script calculates the Shannon information entropy | |
# for each initial guess in Wordle. Words with high entropy are | |
# good starting guesses for Wordle. | |
import numpy as np | |
import requests | |
from collections import Counter | |
def get_data(): | |
""" | |
This function retrieves the Wordle wordlist from a pastebin site. | |
It returns the list of all five-letter words which are allowed as guesses, | |
and a shorter list of common five-letter words, from which the secret | |
word of the day is selected. | |
""" | |
url = 'https://paste.ee/d/4zigF/0' | |
lines = requests.get(url).text.split('\n')[1:-1] | |
words = [line.split('\t')[2] for line in lines] | |
targets = words[:words.index('aahed')] | |
return words, targets | |
def get_hints(guess, target): | |
""" | |
This function returns the hints resulting from a guess and a secret (target) word. | |
The hints are encoded as a five-digit number from 00000 to 22222, where 0 means that | |
the letter is not in the target, 1 means that the letter is in the target but in the | |
wrong position, and 2 means that the letter is in the target and in the correct position. | |
""" | |
guess = guess.lower() | |
target = target.lower() | |
length = len(target) | |
hints = 0 | |
matched = [False] * length | |
for i in range(length): | |
if guess[i] == target[i]: | |
hints += 2 * 10 ** (length - i - 1) | |
matched[i] = True | |
for i in range(length): | |
if guess[i] != target[i]: | |
for j in range(length): | |
if guess[i] == target[j] and not matched[j]: | |
hints += 10 ** (length - i - 1) | |
matched[j] = True | |
break | |
return hints | |
def entropy(guess, targets): | |
""" | |
Calculates the Shannon information entropy for a guess, given a list of target words. | |
""" | |
counter = Counter(get_hints(guess, target) for target in targets) | |
x = np.array(list(counter.values())) | |
p = x / np.sum(x) | |
return -np.sum(p * np.log2(p)) | |
def main(): | |
words, targets = get_data() | |
print("Most informative guess - common words only:") | |
print(*max((entropy(guess, targets), guess) for guess in targets)) | |
print("Most informative guess - all words:") | |
print(*max((entropy(guess, targets), guess) for guess in words)) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment