Last active
June 10, 2020 17:09
-
-
Save harrytallbelt/cd092daf0f36cfb1b4218361bdee7793 to your computer and use it in GitHub Desktop.
Finds all anagrams for given words (Python 3).
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
import itertools | |
# Tested with dictionary from github.com/dwyl/english-words | |
WORDS_DICT_FILENAME = 'words_dictionary.json' | |
__dict = None | |
def get_valid_words(): | |
global __dict | |
if __dict: | |
return __dict | |
with open(WORDS_DICT_FILENAME, 'r', encoding='utf-8') as dict_file: | |
__dict = eval(dict_file.read()) | |
return __dict | |
def character_occurances(word): | |
res = {} | |
for c in word: | |
res[c] = res[c] + 1 if c in res else 1 | |
return res | |
def are_anagrams(word1, word2): | |
if len(word1) != len(word2): | |
return False | |
return character_occurances(word1) == character_occurances(word2) | |
def gen_anagrams(word, valid_words): | |
word = word.lower() | |
permutation_number = math.factorial(len(word)) | |
# If there are more possible character permutations of the input word, | |
# than there are known valid words, then it is faster to simply check | |
# every valid word for whether it's the input word's anagram. | |
if permutation_number > len(valid_words): | |
for dict_word in valid_words: | |
if are_anagrams(dict_word, word): | |
yield dict_word | |
return | |
# The word can have several occurances of one character, which means | |
# some of its character permutations can be equal to others. (Swapping 'o's | |
# in 'cowboy' doesn't change the word, but it is a valid permutation.) | |
# We do not filter those non-unique permutations out, as it would require | |
# storing all of them, which can take a lot of memory (factorial of | |
# the word's length). Instead, we use a set to filter out repeated anagrams. | |
anagrams = set() | |
permutations = map(''.join, itertools.permutations(word)) | |
for possible_anagram in permutations: | |
if possible_anagram in valid_words and not possible_anagram in anagrams: | |
anagrams.add(possible_anagram) | |
yield possible_anagram | |
if __name__ == '__main__': | |
import sys | |
words = sys.argv[1:] | |
if not words: | |
print('No words given in command line arguments.', end='\n\n') | |
exit() | |
print('Initializing word list...', end='\n\n') | |
valid_words = get_valid_words() | |
for word in words: | |
word = word.lower() | |
perm_number = math.factorial(len(word)) | |
print(f'Anagrams for "{word}" (out of {perm_number} options):') | |
for anagram in gen_anagrams(word, valid_words): | |
print('-', anagram) | |
print() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment