Last active
July 13, 2018 15:42
-
-
Save ptbrowne/05d8231bde5b6e963278e6ec451a2fbf to your computer and use it in GitHub Desktop.
alphabet.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import defaultdict | |
import sys | |
from copy import deepcopy | |
def find_common_prefix(word1, word2): | |
"""Find the longest common prefix between two words""" | |
i = 0 | |
l1 = len(word1) | |
l2 = len(word2) | |
while l1 > i and l2 > i and word1[i] == word2[i]: | |
i += 1 | |
return word1[0:i] | |
def test_find_common_prefix(): | |
assert find_common_prefix('ABB', 'AB') == 'AB' | |
assert find_common_prefix('A', 'B') == '' | |
assert find_common_prefix('ABCDE', 'ABCDEF') == 'ABCDE' | |
assert find_common_prefix('DEHIO', '') == '' | |
def learn_one(rules, word_before, word_after): | |
"""From two words, will learn a rule of precedence and add | |
it to the rule repository. | |
Also adds the letters from both words to the rules repository | |
(there may be some letters for which we do not have any precedence | |
knowledge but have at least the knowledge that they exist. | |
> rules = defaultdict(set) | |
> learn_one(rules, 'AB', 'AC') | |
> rules | |
{"A": set(), "C": {"B"}, "B": set() } | |
""" | |
if not word_before or not word_after: | |
return | |
common = find_common_prefix(word_before, word_after) | |
lcom = len(common) | |
before = word_before[lcom] if lcom < len(word_before) else None | |
after = word_after[lcom] if lcom < len(word_after) else None | |
if before and after and not after in rules[before]: | |
rules[after].add(before) | |
for l in word_before: | |
rules[l] # acknowledge the letter | |
for l in word_after: | |
rules[l] # acknowledge the letter | |
def test_learn(): | |
rules = defaultdict(set) | |
learn_one(rules, 'AB', 'AC') | |
learn_one(rules, 'B', 'D') | |
learn_one(rules, 'ABC', 'ABDE') | |
assert 'B' in rules['C'] | |
assert 'B' in rules['D'] | |
assert 'C' in rules['D'] | |
assert 'D' in rules | |
assert 'E' in rules | |
def learn_precedence_rules(words): | |
"""From a sorted list of words, learn the rules of precedence. | |
Output is a defaultdict where keys are letters ("A", "B") and | |
values are all the letters before the key. | |
For the latin alphabet : | |
A -> [] | |
B -> ["A"] | |
C -> ["B", "C"] | |
""" | |
rules = defaultdict(set) | |
word_before = None | |
l = len(words) + 0.0 | |
for word in words: | |
learn_one(rules, word_before, word) | |
word_before = word | |
return rules | |
def remove_letter(rules, letter): | |
"""Remove all occurences of a letter from the rules of precedence""" | |
del rules[letter] | |
for s in rules.values(): | |
if letter in s: | |
s.remove(letter) | |
def find_possible_order(rules): | |
"""From the rules of precedence, outputs a possible order""" | |
rules = deepcopy(rules) # going to mutate rules | |
result = [] | |
while len(rules): | |
min_len = min(len(s) for s in rules.values()) | |
for letter, letters_before in rules.items(): | |
if len(letters_before) == min_len: | |
result.append(letter) | |
remove_letter(rules, letter) | |
break | |
return "".join(result) | |
def test_find_possible_order(): | |
res = find_possible_order({ | |
"A": set(), | |
"B": set(['A']), | |
"C": set(['B']), | |
"D": set(['B', 'C']) | |
}) | |
assert res == 'ABCD' | |
def find_alphabet(words): | |
"""Given a list of words in lexicographic order, but formed from characters of an | |
unknown alphabet, returns the list of characters in the correct order: | |
the "alphabet" for these words.""" | |
rules = learn_precedence_rules(words) | |
return find_possible_order(rules) | |
def main(): | |
with open('/usr/share/dict/words') as f: | |
words = [l.strip().lower() for l in f.readlines()] | |
alphabet = find_alphabet(words) | |
print("".join(alphabet)) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment