Last active
August 29, 2015 14:04
-
-
Save damzam/499e1eec2a84e558140d to your computer and use it in GitHub Desktop.
solve a word jumble
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
A script that generates all possible words that can be formed from | |
the characters in a provided string. | |
Matches are case-insensitive and the wordlist used is obtained from: | |
https://raw.githubusercontent.com/mattt/mobius/master/data/english-wordlist | |
Usage: | |
./jumble_it_up.py <string> | |
* Single-letter words will not be included in the output | |
** If, in the case that the number of occurrences of a single character | |
in a word exceeds the number of that character in the string, it will | |
not be included in the output | |
""" | |
import argparse | |
from collections import defaultdict | |
from urllib import urlopen | |
WORDLIST_URL = 'https://raw.githubusercontent.com/mattt/mobius/master/data/english-wordlist' | |
def generate_wordlist(string, wordlist): | |
""" | |
Return a list of all valid English words that can be generated | |
from the characters in parameter string (see the __doc__ above) | |
""" | |
def _get_char_dict(chars): | |
""" | |
A utility function that creates a dictionary with the | |
respective counts of each character in a string | |
""" | |
char_dict = defaultdict(int) | |
for char in chars: | |
char_dict[char] += 1 | |
return char_dict | |
# We want the characters and respective counts of the string provided | |
string_dict = _get_char_dict(string) | |
matches = [] | |
# Iterate through the words and see if can create them from the characters in string | |
for word in wordlist: | |
if len(word) > len(string): | |
continue # It's not a match if the word's longer than string | |
# We also want the characters and respective counts of the word | |
word_dict = _get_char_dict(word) | |
# We're using the variable 'possible match' to break out of a nested loop | |
# without raising exceptions (which are really slow). The other option | |
# would be to use enumerate, but that would also be significantly slower. | |
possible_match = True | |
while possible_match: | |
for char in word_dict.iterkeys(): | |
if char not in string_dict or word_dict[char] > string_dict[char]: | |
possible_match = False | |
break | |
if possible_match and len(word) != 1: # We're not returning single characters | |
matches.append(word) | |
break | |
return matches | |
def main(): | |
parser = argparse.ArgumentParser(description=__doc__) | |
parser.add_argument('string', help='available characters') | |
p = parser.parse_args() | |
wordlist = urlopen(WORDLIST_URL).read().lower().split() | |
for match in generate_wordlist(p.string.lower(), wordlist): | |
print match | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment