Created
October 9, 2012 22:52
-
-
Save matchu/3861977 to your computer and use it in GitHub Desktop.
Linear-time anagram detector
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import string | |
# The first 26 primes (corresponding to A-Z) | |
ALPHABET_PRIMES = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101] | |
def get_letter_prime(letter): | |
"""Given a letter A-Z (equivalent to a-z), return a unique prime number | |
corresponding to that letter. Given a whitespace character, return 0. Given | |
anything else, raise an error.""" | |
if letter in string.whitespace: | |
return 0 # whitespace characters are valueless | |
letter_ord = ord(letter.upper()) | |
if letter_ord < ord("A") or letter_ord > ord("Z"): | |
raise ValueError("letter must be alphabetical or space") | |
index = ord(letter.upper()) - ord("A") | |
return ALPHABET_PRIMES[index] | |
def get_prime_letter_sum(letters): | |
"""Return a unique sum corresponding to the number of occurrences of | |
each letter A-Z (equivalent to a-z) in the given input. Whitespace | |
characters are ignored, and non-whitespace, non-alphabetical characters | |
raise an error.""" | |
return reduce(lambda t, l: t + get_letter_prime(l), letters, 0) | |
def is_anagram_primality(a, b): | |
"""Returns whether or not a and b are anagrams. All whitespace | |
characters are ignored, and non-whitespace, non-alphabetical | |
characters raise an error.""" | |
return get_prime_letter_sum(a) == get_prime_letter_sum(b) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Has a few restrictions, sure, but it's pretty nice. Could be extended to all of ASCII if we really cared enough.