Created
April 16, 2013 20:08
-
-
Save davesque/5399178 to your computer and use it in GitHub Desktop.
My solution to the Thumbtack PyCon challenge 2013
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import itertools | |
import sys | |
from collections import defaultdict | |
from pyparsing import CharsNotIn, Group, Optional, Word, ZeroOrMore, alphanums | |
word = Word(alphanums) | |
space = CharsNotIn(alphanums) | |
doc = ZeroOrMore(Group(word + Optional(space))) | |
def find_anagram_groups(content): | |
content = content.lower() | |
sys.stderr.write('Parsing words...\n') | |
parsed = doc.parseString(content) | |
words = set(p[0] for p in parsed if not len(p[0]) < 4) | |
sys.stderr.write('Getting word combinations...\n') | |
combinations = itertools.combinations(words, 2) | |
sys.stderr.write('Grouping combinations by anagram (this might take a while)...\n') | |
anagrams = defaultdict(list) | |
for c in combinations: | |
key = ''.join(sorted(c[0] + c[1])) | |
ana_words = tuple(itertools.chain(*anagrams[key])) | |
if c[0] not in ana_words and c[1] not in ana_words: | |
anagrams[key].append(c) | |
sys.stderr.write('Sorting anagram groups by length...\n') | |
groups = sorted(anagrams.values(), key=lambda g: -len(g)) | |
if groups: | |
max_length = len(groups[0]) | |
max_length_groups = itertools.takewhile(lambda g: len(g) == max_length, groups) | |
return max_length_groups | |
else: | |
return [] | |
if __name__ == '__main__': | |
content = sys.stdin.read() | |
groups = find_anagram_groups(content) | |
for g in groups: | |
sys.stdout.write(', '.join(' '.join(p) for p in g) + '\n') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment