Skip to content

Instantly share code, notes, and snippets.

@denis-bz
Created July 17, 2023 12:53
Show Gist options
  • Save denis-bz/07fc76c19fb3b3b6f404023e93e90807 to your computer and use it in GitHub Desktop.
Save denis-bz/07fc76c19fb3b3b6f404023e93e90807 to your computer and use it in GitHub Desktop.
mst.py unionfind.py from 2011

Old mst.py with Kruskal's algorithm

mst.py is simple old (2011) code for MSTs using Kruskal's algorithm -- see the animation there.

Kleinberg-Tardos:

single-link k-clustering == MST k-1 most expensive edges

but min( max spacing ), Theorem 1, may be wrong or at least odd for k-clustering ?

See also
Prof. Wayne's teaching
http://nifty.stanford.edu/
https://github.com/topics/mst
https://github.com/topics/clustering
wiki Single-linkage_clustering -- long thin clusters, dendrograms

cheers
-- denis July 2023

#!/usr/bin/env python3
""" mst.py 2011 https://stats.stackexchange.com/questions/1475/visualization-software-for-clustering
see also
https://www.cs.princeton.edu/~wayne/kleinberg-tardos /pdf/04GreedyAlgorithmsII.pdf
single-link k-clustering == MST k-1 most expensive edges
Theorem max (min spacing)
https://github.com/topics/mst /clustering ?l=python
https://en.wikipedia.org/wiki/Kruskal's_algorithm animation
https://en.wikipedia.org/wiki/Single-linkage_clustering long thin clusters, dendrograms
"""
import numpy as np
from unionfind import Unionfind
#...............................................................................
def mst( edges, sorted=False ):
""" edges [ (dist, x, y) ... ] -> k edges in mst """
# Kruskal: Cormen p. 505
edges = np.asarray( edges )
dist, nodes = edges[:,0], edges[:,1:]
if not sorted:
edges = edges[ dist.argsort() ]
n = nodes.max() + 1
comp = Unionfind(n) # all singletons
mstedges = []
for d,x,y in edges:
if comp.diff( x, y ):
comp.union( x, y )
mstedges.append( (d, x, y) )
return mstedges
#...............................................................................
if __name__ == "__main__":
import sys
print( 80 * "▄" )
# to change these params, run this.py a=1 b=None 'c = expr' ... in sh or ipython --
for arg in sys.argv[1:]:
exec( arg )
dxy = np.array([
# 4 a b, 8 b c, 7 c d, 9 d e, 10 e f, 2 f g, 1 g h,
# 8 h a, 11 h b, 7 h i, 2 i c, 6 i g, 4 c f, 14 d f,
(4,1,2), (8,2,3), (7,3,4), (9,4,5), (10,5,6), (2,6,7), (1,7,8),
(8,8,1), (11,8,2), (7,8,9), (2,9,3), (6,9,7), (4,3,6), (14,4,6),
])
dist = dxy[:,0]
maxnode = dxy[:,1:].max()
print( "\n-- Graph: distance node node" )
print( dxy[ dist.argsort() ].T, "\n" )
mste = mst( dxy )
print( "\n-- MST edgeweight node node" )
for dxy in mste:
print( "%d %d %d" % dxy ) # 1 7 8 2 6 7 ...
__version__ = "2023-07-17 July" # https://gist.github.com/denis-bz
#!/usr/bin/env python3
class Unionfind:
""" diff, union, findset: 0 <= x, y < n """
# straight from Cormen Leiserson Rivest p. 448
def __init__( self, n ):
self.n = n
self.p = list( range( n ))
self.rank = n * [0]
def findset( self, x ):
if x != self.p[x]:
self.p[x] = self.findset( self.p[x] ) # lovely
return self.p[x]
def union( self, x, y ):
assert 0 <= x < self.n, "x %d not in 0:%d" % (x, self.n)
assert 0 <= y < self.n, "y %d not in 0:%d" % (y, self.n)
x = self.findset(x)
y = self.findset(y)
if self.rank[x] > self.rank[y]:
self.p[y] = x
else:
self.p[x] = y
if self.rank[x] == self.rank[y]:
self.rank[y] += 1
def diff( self, x, y ):
assert 0 <= x < self.n, "x %d not in 0:%d" % (x, self.n)
assert 0 <= y < self.n, "y %d not in 0:%d" % (y, self.n)
return self.findset(x) != self.findset(y)
#...............................................................................
if __name__ == "__main__":
uf = Unionfind( 5 )
uf.union( 0, 2 )
uf.union( 3, 4 )
uf.union( 2, 3 )
print("diff( 0, 1 ):", uf.diff( 0, 1 ))
print("diff( 0, 4 ):", uf.diff( 0, 4 ))
print("p:", uf.p)
print("rank:", uf.rank)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment