Last active
June 16, 2019 12:44
-
-
Save cmartello/7a458e3c6e0a7e20b20b653c7bd9e01a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""This is a simple script I originally wrote for finding solutons in | |
the "wordscapes" program; it presents you with a series of letters | |
that you unjumble in various manners to form words which are | |
then arranged on an empty crossword puzzle. | |
Currently, it'll search a supplied dictionary for possible words, but on | |
my TODO list is to add functionality to seach for patterns and | |
lengths of words. | |
""" | |
# it's a global, i know, this will change in refactoring | |
ALPHA = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' | |
def cleanup(base_string): | |
"""Filters a provided string to be only the letters A-Z and changed | |
to capitals only.""" | |
return''.join([x for x in base_string if x.isalpha()]).upper() | |
def load_dictionary(user): | |
"""loads up a dictionary as a list of sets. The index is the length | |
and the set contains all words of that length.""" | |
words = dict() | |
allwords = set() | |
for letter in ALPHA: | |
words[letter] = set() | |
# open file, read words | |
for line in open(user, 'r'): | |
line = cleanup(line) | |
allwords.add(line) | |
for ltr in ALPHA: | |
if ltr in line: | |
words[ltr].add(line) | |
# we're done | |
return (words, allwords) | |
def search(letters, dictionary, length=0): | |
"""Searches a given dictionary (which is technically a set, but...) for | |
words that contain only the letters provided above or a subset thereof. | |
""" | |
results = [] | |
# filter out nonalphabetic characters | |
letters = cleanup(letters) | |
# from the set of all words, remove words that aren't possible to create | |
# that is, if a set of letters does not contain "P", "PUKE" will not be | |
# in the results set | |
results = dictionary[1] | |
for ltr in ALPHA: | |
if ltr in letters: | |
continue | |
if ltr not in letters: | |
results = results - dictionary[0][ltr] | |
# reduce results by filtering out words that contain too many copies of | |
# some letters. example, if the query is "LEAP", then "EEL" would appear | |
# valid in the previous step, but is not a solution. | |
filtered = set() | |
for word in results: | |
valid = True | |
# trivially reject too long words as not being solutions | |
if len(word) > len(letters): | |
continue | |
# we'll iterate over each letter in the word and a letter occurs more | |
# times in the result than the query, we'll skip past it. | |
for ltr in word: | |
if word.count(ltr) > letters.count(ltr): | |
valid = False | |
break | |
# if it didn't fail the letter count test, add it. | |
if valid: | |
filtered.add(word) | |
# filter for maximum length if specified | |
limited = set() | |
if length > 0: | |
for word in filtered: | |
if len(word) == length: | |
limited.add(word) | |
return limited | |
return filtered | |
def main_loop(): | |
"""Just a simple loop: enter query, get results.""" | |
dictionary = load_dictionary('TWL06.txt') | |
while True: | |
query = input('> ').split() | |
if len(query) < 2: | |
maxlen = 0 | |
else: | |
maxlen = int(query[1]) | |
for results in search(query[0].upper(), dictionary, maxlen): | |
print(results) | |
if __name__ == '__main__': | |
main_loop() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment