robspassky · May 14, 2018 03:18
diff --git a/ladder.py b/ladder.py
 """

 ladder.py

 words3.txt was created by the following:

 $ egrep '^[a-z][a-z][a-z]$' /etc/dictionaries-common/words > words3.txt

 algorithm:

 1. populate a graph where the words are vertices, and there are edges
   between the words that differ by one letter
 2. do a BFS at the "start" word until you find the "end" word or it fails

 populating the graph:

 1. create an array/list of all the valid words
 2. for each word, mask each letter in turn with an underscore and add an entry
   to a dictionary, where the value of each entry is a set containing all
   words from which that key was generated. all the members of each set are
   one letter apart.
 3. iterate through the dictionary and populate another dictionary where the
   keys are the words, and the values are all the "adjacent" words given
   according to the dictionary created in #2.
 4. this new dictionary defines an undirected graph where the keys are the
   vertices (the words) and the values are a set of edges for each vertex
   (i.e., words which are one letter different)
   
 complexity:

 populating the graph involves 1) iterating through all n valid words, and for
 each one a) adding one hash entry for each letter, each entry requiring 
 b) iteration through the word to calculate the hash; and then iterating through
 the m*n keys in the hash to build the graph.

 so that is O(n*m*m + mn) to create the graph

 once the graph is created the BFS operation to find the word ladder will
 potentially visit all n nodes, but more likely a subset consisting of the
 connected component of which the start word is a part.

 so that is O(n)

 thus the dominant timing is for the graph construction, or O(n*m*(m+1)).
 the n (number of valid words) will be the larger factor than the m (number
 of letters in word). e.g., there were 586 3-letter words in the "dict" file,
 and 4495 5-letter words. so the algorithm will take superlinear-ish time
 with regard to the number of valid words. but not close to quadratic

 """
 import queue


 def load_words(file):
    with open(file) as f:
        return [line.strip() for line in f]


 def make_key(word, index):
    x = list(word)
    x[index] = "_"
    return "".join(x)


 def make_keys(word):
    return [make_key(word, i) for i in range(len(word))]


 def pop_hash(h, word):
    for key in make_keys(word):
        if key in h:
            h[key].add(word)
        else:
            h[key] = {word}


 def add_edges(g, words):
    """add edges between all words in the set"""
    for w in words:
        x = words.copy()
        x.remove(w)
        if w in g:
            g[w] = g[w].union(x)
        else:
            g[w] = x


 def pop_graph(words):
    h = {}
    for w in words:
        pop_hash(h, w)
    g = {}
    for k in h:
        if len(h[k]) > 1:
            add_edges(g, h[k])
    return g


 def find_ladder(graph, start, end):
    q = queue.Queue()
    visited = {}
    parent = {}
    done = False

    if start not in graph:
        return []

    q.put(start)
    while q.empty() is False and done is False:
        word = q.get()
        visited[word] = True
        for w in graph[word]:
            if w == end:
                parent[w] = word
                done = True
                break
            elif w not in visited:
                parent[w] = word
                q.put(w)

    if done is True:
        retval = []
        w = end
        while w in parent and w != start:
            retval.append(w)
            w = parent[w]
        retval.append(start)
        retval.reverse()
        return retval
    else:
        return []


 if __name__ == "__main__":
    words = load_words('words3.txt')
    graph = pop_graph(words)
    ladder = find_ladder(graph, 'fig', 'dog')
    print(str(ladder))
	"""

	ladder.py

	words3.txt was created by the following:

	$ egrep '^[a-z][a-z][a-z]$' /etc/dictionaries-common/words > words3.txt

	algorithm:

	1. populate a graph where the words are vertices, and there are edges
	between the words that differ by one letter
	2. do a BFS at the "start" word until you find the "end" word or it fails

	populating the graph:

	1. create an array/list of all the valid words
	2. for each word, mask each letter in turn with an underscore and add an entry
	to a dictionary, where the value of each entry is a set containing all
	words from which that key was generated. all the members of each set are
	one letter apart.
	3. iterate through the dictionary and populate another dictionary where the
	keys are the words, and the values are all the "adjacent" words given
	according to the dictionary created in #2.
	4. this new dictionary defines an undirected graph where the keys are the
	vertices (the words) and the values are a set of edges for each vertex
	(i.e., words which are one letter different)

	complexity:

	populating the graph involves 1) iterating through all n valid words, and for
	each one a) adding one hash entry for each letter, each entry requiring
	b) iteration through the word to calculate the hash; and then iterating through
	the m*n keys in the hash to build the graph.

	so that is O(nmm + mn) to create the graph

	once the graph is created the BFS operation to find the word ladder will
	potentially visit all n nodes, but more likely a subset consisting of the
	connected component of which the start word is a part.

	so that is O(n)

	thus the dominant timing is for the graph construction, or O(nm(m+1)).
	the n (number of valid words) will be the larger factor than the m (number
	of letters in word). e.g., there were 586 3-letter words in the "dict" file,
	and 4495 5-letter words. so the algorithm will take superlinear-ish time
	with regard to the number of valid words. but not close to quadratic

	"""
	import queue


	def load_words(file):
	with open(file) as f:
	return [line.strip() for line in f]


	def make_key(word, index):
	x = list(word)
	x[index] = "_"
	return "".join(x)


	def make_keys(word):
	return [make_key(word, i) for i in range(len(word))]


	def pop_hash(h, word):
	for key in make_keys(word):
	if key in h:
	h[key].add(word)
	else:
	h[key] = {word}


	def add_edges(g, words):
	"""add edges between all words in the set"""
	for w in words:
	x = words.copy()
	x.remove(w)
	if w in g:
	g[w] = g[w].union(x)
	else:
	g[w] = x


	def pop_graph(words):
	h = {}
	for w in words:
	pop_hash(h, w)
	g = {}
	for k in h:
	if len(h[k]) > 1:
	add_edges(g, h[k])
	return g


	def find_ladder(graph, start, end):
	q = queue.Queue()
	visited = {}
	parent = {}
	done = False

	if start not in graph:
	return []

	q.put(start)
	while q.empty() is False and done is False:
	word = q.get()
	visited[word] = True
	for w in graph[word]:
	if w == end:
	parent[w] = word
	done = True
	break
	elif w not in visited:
	parent[w] = word
	q.put(w)

	if done is True:
	retval = []
	w = end
	while w in parent and w != start:
	retval.append(w)
	w = parent[w]
	retval.append(start)
	retval.reverse()
	return retval
	else:
	return []


	if __name__ == "__main__":
	words = load_words('words3.txt')
	graph = pop_graph(words)
	ladder = find_ladder(graph, 'fig', 'dog')
	print(str(ladder))