Last active
January 26, 2022 23:56
-
-
Save shoma/17ec8135992613232fbe87d7c69bcdfa to your computer and use it in GitHub Desktop.
helper for wordle
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import os | |
import operator | |
from collections import OrderedDict | |
""" | |
https://www.powerlanguage.co.uk/wordle/ | |
""" | |
def check(word: str, contain: str, excludes: str) -> bool: | |
r = r2 = True | |
if len(contain) > 0: | |
r = all(s in word for s in contain) | |
if len(excludes) > 0: | |
r2 = all(s not in word for s in excludes) | |
return all([r, r2]) | |
def search(contain: str, exclude: str, ptn) -> [str]: | |
rtn = [] | |
dictionary = "/usr/share/dict/words" | |
if not os.path.isfile(dictionary): | |
raise RuntimeError('/usr/share/dict/words is not exists') | |
with open(dictionary, 'r') as f: | |
for line in f.readlines(): | |
word = line.strip().lower() | |
if len(word) != 5: | |
continue | |
if check(word, contain, exclude): | |
if ptn is not None: | |
if ptn.match(word): | |
rtn.append(word) | |
else: | |
rtn.append(word) | |
return rtn | |
def calc_score(word) -> float: | |
# https://www3.nd.edu/~busiforc/handouts/cryptography/letterfrequencies.html | |
letterfrequencies = { | |
"e": 11.1607, | |
"m": 3.0129, | |
"a": 8.4966, | |
"h": 3.0034, | |
"r": 7.5809, | |
"g": 2.4705, | |
"i": 7.5448, | |
"b": 2.0720, | |
"o": 7.1635, | |
"f": 1.8121, | |
"t": 6.9509, | |
"y": 1.7779, | |
"n": 6.6544, | |
"w": 1.2899, | |
"s": 5.7351, | |
"k": 1.1016, | |
"l": 5.4893, | |
"v": 1.0074, | |
"c": 4.5388, | |
"x": 0.2902, | |
"u": 3.6308, | |
"z": 0.2722, | |
"d": 3.3844, | |
"j": 0.1965, | |
"p": 3.1671, | |
"q": 0.1962, | |
} | |
score = 0.0 | |
for letter in word: | |
score += letterfrequencies[letter] | |
return score | |
def filter_by_frequencies(words: [str], limit: int) -> [str]: | |
with_score = OrderedDict() | |
for w in words: | |
score = calc_score(w) | |
with_score[w] = score | |
ordered = sorted(with_score.items(), key=operator.itemgetter(1)) | |
ordered.reverse() | |
rtn = [] | |
hasdup = [] | |
# append if word has duplicate letter | |
for w in ordered: | |
if len(set(w[0])) != 5: | |
hasdup.append(w) | |
else: | |
rtn.append(w) | |
if len(rtn) < limit: | |
rtn = rtn + hasdup | |
return [t[0] for t in rtn[:limit]] | |
def main(): | |
contain = "" | |
exclude = "" | |
pattern = "....." | |
# `raise` is a good word to start the game because made up of high frequency letters | |
print('raise') | |
try: | |
while True: | |
contain = input("contain letters (%s): " % contain) | |
exclude = input("exclude letters (%s): " % exclude) | |
pattern = input("pattern letters eg, ..i[^e]. (%s): " % pattern) | |
ptn = re.compile(pattern) | |
found = search(contain, exclude, ptn) | |
rtn = filter_by_frequencies(found, 20) | |
for w in rtn: | |
print(w) | |
except KeyboardInterrupt: | |
pass | |
if __name__ == '__main__': | |
main() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment