Created
August 1, 2012 22:43
-
-
Save kedarbellare/3231358 to your computer and use it in GitHub Desktop.
dijkstra implementations
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import heapq | |
import csv | |
# no heapq implementation | |
def up_heapify(L, i): | |
while i: | |
pi = (i-1)/2 | |
if L[pi] > L[i]: | |
L[i], L[pi] = L[pi], L[i] | |
i = pi | |
else: | |
return | |
def down_heapify(L, i): | |
length = len(L) | |
while True: | |
li, ri = 2*i+1, 2*i+2 | |
if ri >= length: | |
if li >= length: return | |
elif L[i] > L[li]: | |
(L[i], L[li]) = (L[li], L[i]) | |
return | |
else: | |
si = li if L[li] < L[ri] else ri | |
if L[i] > L[si]: | |
(L[i], L[si]) = (L[si], L[i]) | |
i = si | |
else: return | |
def extract_min(L): | |
L[0], L[len(L)-1] = L[len(L)-1], L[0] | |
minval = L.pop() | |
down_heapify(L, 0) | |
return minval | |
def heap_add(L, x): | |
L.append(x) | |
up_heapify(L, len(L)-1) | |
def get_paths(parent, v): | |
paths = {v: [v]} | |
def add_paths_from(x): | |
if x not in paths: | |
px_path = add_paths_from(parent[x]) | |
paths[x] = px_path + [x] | |
return paths[x] | |
for x in parent: | |
if x not in paths: | |
add_paths_from(x) | |
return paths | |
def dijkstra_mod(G, v, dist_update, heap_pop, heap_push): | |
dist_so_far = {v: 0} | |
dist_heap = [(0, v)] | |
parent = {} | |
final_dist = {} | |
while len(dist_heap) > 0: | |
dw, w = heap_pop(dist_heap) | |
if w not in final_dist: | |
final_dist[w] = dw | |
for x in G[w]: | |
if x not in final_dist: | |
dx = dist_update(dw, G[w][x]) | |
if x not in dist_so_far or dist_so_far[x] > dx: | |
dist_so_far[x] = dx | |
heap_push(dist_heap, (dx, x)) | |
parent[x] = w | |
return final_dist, parent | |
def dijkstra(G, v, dist_update = lambda d1, d2: d1+d2): | |
return dijkstra_mod(G, v, dist_update, heapq.heappop, heapq.heappush) | |
def dijkstra_vanilla(G, v, dist_update = lambda d1, d2: d1+d2): | |
return dijkstra_mod(G, v, dist_update, extract_min, heap_add) | |
# D's Dijkstra implementation | |
def down_heapifyD(L, H, i): | |
llen, L = L[0], L[1] | |
while (True): | |
left = 2*i+1 | |
right = 2*i+2 | |
if (right >= llen): | |
if (left >= llen): | |
# leaf | |
return i | |
else: | |
# only one child | |
if L[i][0] > L[left][0]: | |
(L[i], L[left]) = (L[left], L[i]) | |
H[L[i][1]] = i | |
H[L[left][1]] = left | |
return left | |
else: | |
# if i has two children... | |
if min(L[left][0], L[right][0]) >= L[i][0]: return i | |
if L[left][0] < L[right][0]: | |
(L[i], L[left]) = (L[left], L[i]) | |
H[L[i][1]] = i | |
H[L[left][1]] = left | |
i = left | |
else: | |
# Swap into right child | |
(L[i], L[right]) = (L[right], L[i]) | |
H[L[i][1]] = i | |
H[L[right][1]] = right | |
i = right | |
def up_heapifyD(L, H, i): | |
L = L[1] | |
while i: | |
parentIndex = (i-1)/2 | |
if L[i][0] < L[parentIndex][0]: | |
L[i], L[parentIndex] = L[parentIndex], L[i] | |
H[L[i][1]] = i | |
H[L[parentIndex][1]] = parentIndex | |
i = parentIndex | |
else: | |
return i | |
return i | |
def heap_addD(L, H, node, value): | |
L[0] += 1 | |
L[1][L[0]-1] = (value, node) | |
H[node] = L[0]-1 | |
up_heapifyD(L, H, L[0]-1) | |
def heap_delD(L, H): | |
del H[L[1][0][1]] | |
if (L[0]>1): | |
L[0] -= 1 | |
L[1][0] = L[1][L[0]] | |
H[L[1][0][1]] = 0 | |
down_heapifyD(L, H, 0) | |
else: | |
L[0] -= 1 | |
def dijkstraD(G,v): | |
L = [0, len(G)*[None]] | |
H = {} | |
heap_addD(L, H, v, 0) | |
final_dist = {} | |
while len(final_dist) < len(G) and L[0]: | |
(dist, w) = L[1][0] | |
final_dist[w] = dist | |
heap_delD(L, H) | |
for x in G[w]: | |
if x not in final_dist: | |
new_dist = final_dist[w] + G[w][x] | |
if x not in H: | |
heap_addD(L, H, x, new_dist) | |
elif new_dist < L[1][H[x]][0]: | |
L[1][H[x]] = (new_dist, x) | |
down_heapifyD(L, H, H[x]) | |
return final_dist | |
############ | |
# | |
# Test | |
def make_link(G, node1, node2, w): | |
if node1 not in G: | |
G[node1] = {} | |
if node2 not in G[node1]: | |
(G[node1])[node2] = 0 | |
(G[node1])[node2] += w | |
if node2 not in G: | |
G[node2] = {} | |
if node1 not in G[node2]: | |
(G[node2])[node1] = 0 | |
(G[node2])[node1] += w | |
return G | |
def make_random_graph(size): | |
from random import randint | |
G = {} | |
nodes = range(size) | |
for a in range(2*size): | |
a = a % size | |
b = (a + randint(1, size - 1)) % size | |
w = randint(0, 20) | |
make_link(G, a, b, w) | |
while True: | |
conn, unconn = connected_nodes(G, 0) | |
if len(unconn) == 0: | |
break | |
make_link(G, conn.pop(), unconn.pop(), randint(0, 20)) | |
return G | |
def connected_nodes(G, root): | |
# just run a search | |
all_nodes = set(G.keys()) | |
open_list = [root] | |
visited_nodes = set([root]) | |
all_nodes.remove(root) | |
while len(open_list) > 0: | |
node = open_list.pop() | |
neighbors = G[node].keys() | |
for n in neighbors: | |
if n not in visited_nodes: | |
open_list.append(n) | |
visited_nodes.add(n) | |
all_nodes.remove(n) | |
return visited_nodes, all_nodes | |
def test(): | |
import random | |
# shortcuts | |
(a,b,c,d,e,f,g) = ('A', 'B', 'C', 'D', 'E', 'F', 'G') | |
triples = ((a,c,3),(c,b,10),(a,b,15),(d,b,9),(a,d,4),(d,f,7),(d,e,3), | |
(e,g,1),(e,f,5),(f,g,2),(b,f,1)) | |
G = {} | |
for (i,j,k) in triples: | |
make_link(G, i, j, k) | |
dist, predecessor = dijkstra(G, a) | |
assert dist[g] == 8 #(a -> d -> e -> g) | |
assert dist[b] == 11 #(a -> d -> e -> g -> f -> b) | |
print 'test passes' | |
def test_speed(): | |
import time | |
sizes = [100,300,600,1000,3000,6000,10000,30000] | |
trials = 10 | |
for size in sizes: | |
avg_time = 0.0 | |
avg_time2 = 0.0 | |
avg_timeD = 0.0 | |
for _ in xrange(trials): | |
G = make_random_graph(size) | |
start_time = time.time() | |
dist, parent = dijkstra(G, 0) | |
avg_time += (time.time() - start_time) | |
start_time = time.time() | |
dist, parent = dijkstra_vanilla(G, 0) | |
avg_time2 += (time.time() - start_time) | |
start_time = time.time() | |
dist = dijkstraD(G, 0) | |
avg_timeD += (time.time() - start_time) | |
avg_time /= trials | |
avg_time2 /= trials | |
avg_timeD /= trials | |
print 'time taken for %d size graph, without heapq/with heapq: %.3f, without heapq D/with heapq: %.3f' % (size, avg_time2/avg_time, avg_timeD/avg_time) | |
def read_marvel_graph(filename): | |
# Read an undirected graph in CSV format. Each line is an edge | |
tsv = csv.reader(open(filename), delimiter='\t') | |
G, characters = {}, {} | |
index = 0 | |
for (char, comic) in tsv: | |
if char not in characters: | |
characters[char] = index | |
index += 1 | |
make_link(G, char, comic, 1) | |
charG = {} | |
for char1 in characters: | |
for comic in G[char1]: | |
for char2 in G[comic]: | |
if characters[char1] < characters[char2]: | |
make_link(charG, char1, char2, 1) | |
hopcharG = {} | |
for char1 in charG: | |
for char2 in charG[char1]: | |
count = charG[char1][char2] | |
charG[char1][char2] = 1.0/count | |
if characters[char1] < characters[char2]: | |
make_link(hopcharG, char1, char2, 1) | |
return hopcharG, charG | |
def create_imdb_graph(fname, wtfname): | |
G = {} | |
actors = {} | |
movies = {} | |
movie_wts = {} | |
index = 0 | |
file = csv.reader(open(wtfname, 'rb'), delimiter='\t') | |
for row in file: | |
movie = '%s (%s)' % (row[0], row[1]) | |
movie_wts[movie] = float(row[2]) | |
file = csv.reader(open(fname, 'rb'), delimiter='\t') | |
for row in file: | |
actor = row[0] | |
movie = '%s (%s)' % (row[1], row[2]) | |
if actor not in actors: | |
actors[actor] = index | |
index += 1 | |
if movie not in movies: | |
movies[movie] = index | |
index += 1 | |
make_link(G, actors[actor], movies[movie], movie_wts[movie]) | |
return G, actors, movies | |
test() | |
test_speed() | |
hopcharG, charG = read_marvel_graph('marvel.tsv') | |
my_chars = ['SPIDER-MAN/PETER PAR', 'GREEN GOBLIN/NORMAN ', | |
'WOLVERINE/LOGAN ', 'PROFESSOR X/CHARLES ', 'CAPTAIN AMERICA'] | |
numpaths = 0 | |
for char1 in my_chars: | |
hopdist, hopparent = dijkstra(hopcharG, char1) | |
dist, parent = dijkstra(charG, char1) | |
paths = get_paths(parent, char1) | |
hoppaths = get_paths(hopparent, char1) | |
numcharpaths = 0 | |
for char2 in dist: | |
if len(hoppaths[char2]) != len(paths[char2]): | |
numpaths += 1 | |
numcharpaths += 1 | |
print 'for', char1, '#paths=', numcharpaths | |
print numpaths | |
imdbG, actors, movies = create_imdb_graph('imdb-1.tsv', 'imdb-weights.tsv') | |
testImdb = {(u'Ali, Tony', u'Allen, Woody'): 0.5657, | |
(u'Auberjonois, Rene', u'MacInnes, Angus'): 0.0814, | |
(u'Avery, Shondrella', u'Dorsey, Kimberly (I)'): 0.7837, | |
(u'Bollo, Lou', u'Jeremy, Ron'): 0.4763, | |
(u'Byrne, P.J.', u'Clarke, Larry'): 0.109, | |
(u'Couturier, Sandra-Jessica', u'Jean-Louis, Jimmy'): 0.3649, | |
(u'Crawford, Eve (I)', u'Cutler, Tom'): 0.2052, | |
(u'Flemyng, Jason', u'Newman, Laraine'): 0.139, | |
(u'French, Dawn', u'Smallwood, Tucker'): 0.2979, | |
(u'Gunton, Bob', u'Nagra, Joti'): 0.2136, | |
(u'Hoffman, Jake (I)', u'Shook, Carol'): 0.6073, | |
#(u'Kamiki, Ry\xfbnosuke', u'Thor, Cameron'): 0.3644, | |
(u'Roache, Linus', u'Dreyfuss, Richard'): 0.6731, | |
(u'Sanchez, Phillip (I)', u'Wiest, Dianne'): 0.5083, | |
(u'Sheppard, William Morgan', u'Crook, Mackenzie'): 0.0849, | |
(u'Stan, Sebastian', u'Malahide, Patrick'): 0.2857, | |
(u'Tessiero, Michael A.', u'Molen, Gerald R.'): 0.2056, | |
(u'Thomas, Ken (I)', u'Bell, Jamie (I)'): 0.3941, | |
(u'Thompson, Sophie (I)', u'Foley, Dave (I)'): 0.1095, | |
(u'Tzur, Mira', u'Heston, Charlton'): 0.3642} | |
answer = {(u'Boone Junior, Mark', u'Del Toro, Benicio'): None, | |
(u'Braine, Richard', u'Coogan, Will'): None, | |
(u'Byrne, Michael (I)', u'Quinn, Al (I)'): None, | |
(u'Cartwright, Veronica', u'Edelstein, Lisa'): None, | |
(u'Curry, Jon (II)', u'Wise, Ray (I)'): None, | |
(u'Di Benedetto, John', u'Hallgrey, Johnathan'): None, | |
(u'Hochendoner, Jeff', u'Cross, Kendall'): None, | |
(u'Izquierdo, Ty', u'Kimball, Donna'): None, | |
(u'Jace, Michael', u'Snell, Don'): None, | |
(u'James, Charity', u'Tuerpe, Paul'): None, | |
(u'Kay, Dominic Scott', u'Cathey, Reg E.'): None, | |
(u'McCabe, Richard', u'Washington, Denzel'): None, | |
(u'Reid, Kevin (I)', u'Affleck, Rab'): None, | |
(u'Reid, R.D.', u'Boston, David (IV)'): None, | |
(u'Restivo, Steve', u'Preston, Carrie (I)'): None, | |
(u'Rodriguez, Ramon (II)', u'Mulrooney, Kelsey'): None, | |
(u'Rooker, Michael (I)', u'Grady, Kevin (I)'): None, | |
(u'Ruscoe, Alan', u'Thornton, Cooper'): None, | |
(u'Sloan, Tina', u'Dever, James D.'): None, | |
(u'Wasserman, Jerry', u'Sizemore, Tom'): None} | |
for actor1, actor2 in testImdb: | |
a1, a2 = actors[actor1], actors[actor2] | |
dist, parent = dijkstra(imdbG, a1, dist_update = lambda d1, d2: max(d1, d2)) | |
assert dist[a2] == testImdb[(actor1, actor2)] | |
for actor1, actor2 in answer: | |
a1, a2 = actors[actor1], actors[actor2] | |
dist, parent = dijkstra(imdbG, a1, dist_update = lambda d1, d2: max(d1, d2)) | |
answer[(actor1, actor2)] = dist[a2] | |
print answer |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment