Last active
May 14, 2018 03:18
-
-
Save robspassky/d7c04e0531527b31d4c207ef402bbf39 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
ladder.py | |
words3.txt was created by the following: | |
$ egrep '^[a-z][a-z][a-z]$' /etc/dictionaries-common/words > words3.txt | |
algorithm: | |
1. populate a graph where the words are vertices, and there are edges | |
between the words that differ by one letter | |
2. do a BFS at the "start" word until you find the "end" word or it fails | |
populating the graph: | |
1. create an array/list of all the valid words | |
2. for each word, mask each letter in turn with an underscore and add an entry | |
to a dictionary, where the value of each entry is a set containing all | |
words from which that key was generated. all the members of each set are | |
one letter apart. | |
3. iterate through the dictionary and populate another dictionary where the | |
keys are the words, and the values are all the "adjacent" words given | |
according to the dictionary created in #2. | |
4. this new dictionary defines an undirected graph where the keys are the | |
vertices (the words) and the values are a set of edges for each vertex | |
(i.e., words which are one letter different) | |
complexity: | |
populating the graph involves 1) iterating through all n valid words, and for | |
each one a) adding one hash entry for each letter, each entry requiring | |
b) iteration through the word to calculate the hash; and then iterating through | |
the m*n keys in the hash to build the graph. | |
so that is O(n*m*m + mn) to create the graph | |
once the graph is created the BFS operation to find the word ladder will | |
potentially visit all n nodes, but more likely a subset consisting of the | |
connected component of which the start word is a part. | |
so that is O(n) | |
thus the dominant timing is for the graph construction, or O(n*m*(m+1)). | |
the n (number of valid words) will be the larger factor than the m (number | |
of letters in word). e.g., there were 586 3-letter words in the "dict" file, | |
and 4495 5-letter words. so the algorithm will take superlinear-ish time | |
with regard to the number of valid words. but not close to quadratic | |
""" | |
import queue | |
def load_words(file): | |
with open(file) as f: | |
return [line.strip() for line in f] | |
def make_key(word, index): | |
x = list(word) | |
x[index] = "_" | |
return "".join(x) | |
def make_keys(word): | |
return [make_key(word, i) for i in range(len(word))] | |
def pop_hash(h, word): | |
for key in make_keys(word): | |
if key in h: | |
h[key].add(word) | |
else: | |
h[key] = {word} | |
def add_edges(g, words): | |
"""add edges between all words in the set""" | |
for w in words: | |
x = words.copy() | |
x.remove(w) | |
if w in g: | |
g[w] = g[w].union(x) | |
else: | |
g[w] = x | |
def pop_graph(words): | |
h = {} | |
for w in words: | |
pop_hash(h, w) | |
g = {} | |
for k in h: | |
if len(h[k]) > 1: | |
add_edges(g, h[k]) | |
return g | |
def find_ladder(graph, start, end): | |
q = queue.Queue() | |
visited = {} | |
parent = {} | |
done = False | |
if start not in graph: | |
return [] | |
q.put(start) | |
while q.empty() is False and done is False: | |
word = q.get() | |
visited[word] = True | |
for w in graph[word]: | |
if w == end: | |
parent[w] = word | |
done = True | |
break | |
elif w not in visited: | |
parent[w] = word | |
q.put(w) | |
if done is True: | |
retval = [] | |
w = end | |
while w in parent and w != start: | |
retval.append(w) | |
w = parent[w] | |
retval.append(start) | |
retval.reverse() | |
return retval | |
else: | |
return [] | |
if __name__ == "__main__": | |
words = load_words('words3.txt') | |
graph = pop_graph(words) | |
ladder = find_ladder(graph, 'fig', 'dog') | |
print(str(ladder)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment