Created
March 13, 2016 14:07
-
-
Save andrewxhill/13de0618d31893cdc4c5 to your computer and use it in GitHub Desktop.
Minimum spanning tree in SQL... just because
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE TYPE minimum_spanning_tree_internal AS ( | |
d NUMERIC[], | |
a INT[], | |
b INT[], | |
geoms GEOMETRY[], | |
ids TEXT[] | |
); | |
CREATE TYPE minimum_spanning_tree_unit AS ( | |
d NUMERIC, | |
a INT, | |
b INT | |
); | |
CREATE TYPE minimum_spanning_tree_container AS ( | |
t INT, | |
a INT, | |
b INT | |
); | |
CREATE TYPE minimum_spanning_tree_result AS ( | |
source INT, | |
target INT, | |
the_geom GEOMETRY, | |
cost NUMERIC | |
); | |
CREATE OR REPLACE FUNCTION minimum_spanning_tree_pre(data minimum_spanning_tree_internal, in_geom GEOMETRY, in_class TEXT) RETURNS minimum_spanning_tree_internal as $$ | |
DECLARE | |
d NUMERIC; | |
n NUMERIC; | |
BEGIN | |
n = array_length(data.ids, 1) + 1; | |
IF array_length(data.ids, 1) > 0 THEN | |
FOR i IN 1 .. array_upper(data.ids, 1) LOOP | |
data.d = array_append(data.d, ST_Distance(in_geom, data.geoms[i])::numeric); | |
data.a = array_append(data.a, n::int); | |
data.b = array_append(data.b, i::int); | |
END LOOP; | |
END IF; | |
data.ids = array_append(data.ids, in_class) ; | |
data.geoms = array_append(data.geoms, in_geom) ; | |
RETURN data; | |
END; | |
$$ language plpgsql IMMUTABLE; | |
CREATE AGGREGATE minimum_spanning_tree ( GEOMETRY, TEXT) ( | |
SFUNC = minimum_spanning_tree_pre, | |
STYPE = minimum_spanning_tree_internal | |
-- , FINALFUNC = minimum_spanning_tree_calc | |
); | |
CREATE OR REPLACE FUNCTION minimum_spanning_tree_calc(data minimum_spanning_tree_internal) RETURNS SETOF minimum_spanning_tree_result as $$ | |
DECLARE | |
result DECIMAL; | |
r minimum_spanning_tree_unit; | |
tmp_a minimum_spanning_tree_container; | |
tmp_b minimum_spanning_tree_container; | |
out minimum_spanning_tree_result; | |
a INT; | |
b INT; | |
-- our master tree index | |
tree_m INT[]; | |
tree_a INT[]; | |
tree_b INT[]; | |
-- our loose trees index | |
tree_u INT[]; | |
tree_ua INT[]; | |
tree_ub INT[]; | |
tree_ut INT[]; | |
curr_t INT = 1; --current working tree | |
target_t INT; | |
a_in_m INT; | |
b_in_m INT; | |
move_tree INT; | |
move_tree_a INT[]; | |
move_tree_b INT[]; | |
BEGIN | |
result := 0; | |
FOR r IN SELECT ii.d, ii.a::int, ii.b::int FROM (SELECT unnest(data.d) d, unnest(data.a) a, unnest(data.b) b) ii ORDER BY ii.d ASC LOOP | |
-- result = result+1; | |
IF array_length(tree_m, 1) IS NULL THEN | |
-- our starting condition | |
tree_m = ARRAY[r.a, r.b]; | |
tree_a = ARRAY[r.a]; | |
tree_b = ARRAY[r.b]; | |
ELSE | |
a_in_m = 0; | |
b_in_m = 0; | |
IF r.a = ANY(tree_m) THEN | |
a_in_m = 1; | |
END IF; | |
IF r.b = ANY(tree_m) THEN | |
b_in_m = 1; | |
END IF; | |
IF a_in_m+b_in_m != 2 THEN -- make sure we don't have a loop closer | |
IF array_length(tree_ua, 1) IS NULL THEN | |
tree_ua = ARRAY[r.a]; | |
tree_ub = ARRAY[r.b]; | |
tree_ut = ARRAY[curr_t]; | |
target_t = curr_t; | |
ELSE | |
-- find any case of a in one of the exiting trees | |
SELECT ii.t, ii.a, ii.b INTO tmp_a FROM (SELECT unnest(tree_ut) t, unnest(tree_ua) a,unnest(tree_ub) b) ii WHERE (ii.a = r.a AND ii.b!= r.b) OR (ii.a != r.b AND ii.b = r.a); | |
SELECT ii.t, ii.a, ii.b INTO tmp_b FROM (SELECT unnest(tree_ut) t, unnest(tree_ua) a,unnest(tree_ub) b) ii WHERE (ii.a != r.a AND ii.b= r.b) OR (ii.a = r.b AND ii.b != r.a); | |
target_t = -1; | |
IF tmp_a.t IS NOT null THEN | |
IF tmp_b.t IS null THEN | |
-- Add edge to the tree in tmp_a | |
tree_ua = array_append( tree_ua, r.a); | |
tree_ub = array_append( tree_ub, r.b); | |
tree_ut = array_append( tree_ut, tmp_a.t); | |
-- Keep track of where we put it | |
target_t = tmp_a.t; | |
ELSIF tmp_a.t != tmp_b.t THEN | |
-- Add the edge to tree in tmp_a | |
tree_ua = array_append( tree_ua, r.a); | |
tree_ub = array_append( tree_ub, r.b); | |
tree_ut = array_append( tree_ut, tmp_b.t); | |
-- Change the tree in tmp_b to be the tree in tmp_a | |
tree_ut = array_replace(tree_ut, tmp_a.t, tmp_b.t); | |
-- make sure we capture a needed move of the tree to master | |
IF b_in_m = 1 THEN | |
a_in_m = 1; | |
b_in_m = 0; | |
END IF; | |
target_t = tmp_a.t; | |
END IF; | |
ELSIF tmp_b.t IS NOT null THEN | |
-- Add edge to tree in tmp_b | |
tree_ua = array_append( tree_ua, r.a); | |
tree_ub = array_append( tree_ub, r.b); | |
tree_ut = array_append( tree_ut, tmp_b.t); | |
target_t = tmp_b.t; | |
ELSE | |
-- the edge is from a totally new tree | |
curr_t = curr_t+1; | |
tree_ua = array_append( tree_ua, r.a); | |
tree_ub = array_append( tree_ub, r.b); | |
tree_ut = array_append( tree_ut, curr_t); | |
target_t = curr_t; | |
END IF; | |
END IF; | |
IF a_in_m + b_in_m = 1 THEN -- we need to move a tree to master | |
move_tree = target_t; | |
SELECT array_agg(ii.a::int), array_agg(ii.b::int) INTO move_tree_a, move_tree_b FROM (SELECT unnest(tree_ua) a, unnest(tree_ub) b, unnest(tree_ut) t) ii WHERE ii.t = move_tree; | |
tree_m = tree_m || move_tree_a; | |
tree_m = tree_m || move_tree_b; | |
tree_a = tree_a || move_tree_a; | |
tree_b = tree_b || move_tree_b; | |
SELECT array_agg(ii.a::int), array_agg(ii.b::int), array_agg(ii.t::int) INTO tree_ua, tree_ub, tree_ut FROM (SELECT unnest(tree_ua) a, unnest(tree_ub) b, unnest(tree_ut) t) ii WHERE ii.t != move_tree; | |
END IF; | |
END IF; | |
END IF; | |
END LOOP; | |
FOR i IN 1 .. array_upper(tree_a, 1) LOOP | |
a = tree_a[i]; | |
b = tree_b[i]; | |
out.the_geom = ST_MakeLine(data.geoms[a], data.geoms[b]); | |
out.source = data.ids[a]; | |
out.target = data.ids[b]; | |
out.cost = ST_Distance(data.geoms[a], data.geoms[b]); | |
RETURN NEXT out; | |
END LOOP; | |
RETURN; | |
END; | |
$$ language plpgsql IMMUTABLE; | |
-- WITH a AS ( | |
-- SELECT (minimum_spanning_tree_calc( minimum_spanning_tree(the_geom , cartodb_id::text ORDER BY cartodb_id ASC) )).* from untitled_table_48 ) | |
-- SELECT *, ST_Transform(the_geom, 3857) the_geom_webmercator FROM a |
I bet there are good optimizations just in the basic algorithm design. There has to be a redundant loop or two. After that... it's probably time to write it in C to make it really zip.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
hi @andrewxhill! this code really helped me, thanks! I have a question, how would you optimize it for a large set of points? (for example 1500 points), is it possible?