Last active
May 14, 2019 19:51
-
-
Save PaulReiber/ed444f492a97764c6c76ef357e795ab9 to your computer and use it in GitHub Desktop.
Anagram scanner
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # -*- coding: utf-8 -*- | |
| """ anagrammer - leveraging collections.Counter and pythons base classes | |
| Author: Paul Reiber reiber@gmail.com | |
| License: CC BY-SA 4.0 | |
| input is a dictionary - a list of words with each word listed on exactly once. | |
| output is any anagrams collected which are as big or bigger than their words are long. | |
| output limited to anagrams larger than 4 | |
| if you want to scan a text file with duplicates check out groupwords.py as a better example. | |
| """ | |
| import sys | |
| import re | |
| from collections import Counter | |
| class Collector: # pylint: disable=too-few-public-methods | |
| """ builds a dictionary that groups anagrams """ | |
| data = {} | |
| def __init__(self, term): | |
| key = frozenset(Counter(term).items()) | |
| if key not in Collector.data: | |
| Collector.data[key] = [] | |
| Collector.data[key].append(term) | |
| if __name__ == '__main__': | |
| WORDS = re.compile(r'(\S+)') | |
| for line in sys.stdin: | |
| for word in WORDS.findall(line): | |
| Collector(word) | |
| for anagram in Collector.data.values(): | |
| if len(anagram) >= 4: | |
| if len(anagram) >= len(anagram[0]): | |
| print(', '.join(anagram)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment