shoma · January 26, 2022 23:56
diff --git a/wordle.py b/wordle.py
 import re
 import os
 import operator
 from collections import OrderedDict

 """
 https://www.powerlanguage.co.uk/wordle/
 """


 def check(word: str, contain: str, excludes: str) -> bool:
    r = r2 = True
    if len(contain) > 0:
        r = all(s in word for s in contain)
    if len(excludes) > 0:
        r2 = all(s not in word for s in excludes)
    return all([r, r2])


 def search(contain: str, exclude: str, ptn) -> [str]:
    rtn = []
    dictionary = "/usr/share/dict/words"
    if not os.path.isfile(dictionary):
        raise RuntimeError('/usr/share/dict/words is not exists')

    with open(dictionary, 'r') as f:
        for line in f.readlines():
            word = line.strip().lower()
            if len(word) != 5:
                continue
            if check(word, contain, exclude):
                if ptn is not None:
                    if ptn.match(word):
                        rtn.append(word)
                else:
                    rtn.append(word)
    return rtn


 def calc_score(word) -> float:
    # https://www3.nd.edu/~busiforc/handouts/cryptography/letterfrequencies.html
    letterfrequencies = {
        "e": 11.1607,
        "m": 3.0129,
        "a": 8.4966,
        "h": 3.0034,
        "r": 7.5809,
        "g": 2.4705,
        "i": 7.5448,
        "b": 2.0720,
        "o": 7.1635,
        "f": 1.8121,
        "t": 6.9509,
        "y": 1.7779,
        "n": 6.6544,
        "w": 1.2899,
        "s": 5.7351,
        "k": 1.1016,
        "l": 5.4893,
        "v": 1.0074,
        "c": 4.5388,
        "x": 0.2902,
        "u": 3.6308,
        "z": 0.2722,
        "d": 3.3844,
        "j": 0.1965,
        "p": 3.1671,
        "q": 0.1962,
    }
    score = 0.0
    for letter in word:
        score += letterfrequencies[letter]
    return score


 def filter_by_frequencies(words: [str], limit: int) -> [str]:
    with_score = OrderedDict()
    for w in words:
        score = calc_score(w)
        with_score[w] = score

    ordered = sorted(with_score.items(), key=operator.itemgetter(1))
    ordered.reverse()
    rtn = []
    hasdup = []
    # append if word has duplicate letter
    for w in ordered:
        if len(set(w[0])) != 5:
            hasdup.append(w)
        else:
            rtn.append(w)

    if len(rtn) < limit:
        rtn = rtn + hasdup

    return [t[0] for t in rtn[:limit]]


 def main():
    contain = ""
    exclude = ""
    pattern = "....."

    # `raise` is a good word to start the game because made up of high frequency letters
    print('raise')

    try:
        while True:
            contain = input("contain letters (%s): " % contain)
            exclude = input("exclude letters (%s): " % exclude)
            pattern   = input("pattern letters eg, ..i[^e]. (%s): " % pattern)

            ptn = re.compile(pattern)
            found = search(contain, exclude, ptn)
            rtn = filter_by_frequencies(found, 20)
            for w in rtn:
                print(w)
    except KeyboardInterrupt:
        pass


 if __name__ == '__main__':
    main()
	import re
	import os
	import operator
	from collections import OrderedDict

	"""
	https://www.powerlanguage.co.uk/wordle/
	"""


	def check(word: str, contain: str, excludes: str) -> bool:
	r = r2 = True
	if len(contain) > 0:
	r = all(s in word for s in contain)
	if len(excludes) > 0:
	r2 = all(s not in word for s in excludes)
	return all([r, r2])


	def search(contain: str, exclude: str, ptn) -> [str]:
	rtn = []
	dictionary = "/usr/share/dict/words"
	if not os.path.isfile(dictionary):
	raise RuntimeError('/usr/share/dict/words is not exists')

	with open(dictionary, 'r') as f:
	for line in f.readlines():
	word = line.strip().lower()
	if len(word) != 5:
	continue
	if check(word, contain, exclude):
	if ptn is not None:
	if ptn.match(word):
	rtn.append(word)
	else:
	rtn.append(word)
	return rtn


	def calc_score(word) -> float:
	# https://www3.nd.edu/~busiforc/handouts/cryptography/letterfrequencies.html
	letterfrequencies = {
	"e": 11.1607,
	"m": 3.0129,
	"a": 8.4966,
	"h": 3.0034,
	"r": 7.5809,
	"g": 2.4705,
	"i": 7.5448,
	"b": 2.0720,
	"o": 7.1635,
	"f": 1.8121,
	"t": 6.9509,
	"y": 1.7779,
	"n": 6.6544,
	"w": 1.2899,
	"s": 5.7351,
	"k": 1.1016,
	"l": 5.4893,
	"v": 1.0074,
	"c": 4.5388,
	"x": 0.2902,
	"u": 3.6308,
	"z": 0.2722,
	"d": 3.3844,
	"j": 0.1965,
	"p": 3.1671,
	"q": 0.1962,
	}
	score = 0.0
	for letter in word:
	score += letterfrequencies[letter]
	return score


	def filter_by_frequencies(words: [str], limit: int) -> [str]:
	with_score = OrderedDict()
	for w in words:
	score = calc_score(w)
	with_score[w] = score

	ordered = sorted(with_score.items(), key=operator.itemgetter(1))
	ordered.reverse()
	rtn = []
	hasdup = []
	# append if word has duplicate letter
	for w in ordered:
	if len(set(w[0])) != 5:
	hasdup.append(w)
	else:
	rtn.append(w)

	if len(rtn) < limit:
	rtn = rtn + hasdup

	return [t[0] for t in rtn[:limit]]


	def main():
	contain = ""
	exclude = ""
	pattern = "....."

	# `raise` is a good word to start the game because made up of high frequency letters
	print('raise')

	try:
	while True:
	contain = input("contain letters (%s): " % contain)
	exclude = input("exclude letters (%s): " % exclude)
	pattern = input("pattern letters eg, ..i[^e]. (%s): " % pattern)

	ptn = re.compile(pattern)
	found = search(contain, exclude, ptn)
	rtn = filter_by_frequencies(found, 20)
	for w in rtn:
	print(w)
	except KeyboardInterrupt:
	pass


	if __name__ == '__main__':
	main()