Created
January 27, 2022 00:58
-
-
Save munro/374b2538be634532a253a8d28d826070 to your computer and use it in GitHub Desktop.
BigQuery Connected Components algorithm
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Does a depth of 10 | |
CREATE OR REPLACE PROCEDURE `mydataset`.connected_components_by_edge( | |
table_name STRING, | |
left_column STRING, | |
right_column STRING, | |
output_temp_table_name STRING | |
) | |
OPTIONS( | |
description="Connected components algorithm, clusters nodes by unidirected edges by the smallest node ID." | |
) | |
BEGIN EXECUTE IMMEDIATE """ | |
CREATE OR REPLACE TEMP TABLE """ || output_temp_table_name || """ AS | |
WITH edges AS ( | |
SELECT | |
LEAST(""" || left_column || ", " || right_column || """) AS a, | |
GREATEST(""" || left_column || ", " || right_column || """) AS b | |
FROM """ || table_name || """ | |
), | |
e1 AS ( | |
SELECT LEAST(e.a, follow.a) AS a, e.b FROM edges e JOIN edges follow ON e.a = follow.b WHERE e.a != e.b AND e.a != LEAST(e.a, follow.a) | |
), e2 AS ( | |
SELECT LEAST(e.a, follow.a) AS a, e.b FROM e1 e JOIN edges follow ON e.a = follow.b WHERE e.a != e.b AND e.a != LEAST(e.a, follow.a) | |
), e3 AS ( | |
SELECT LEAST(e.a, follow.a) AS a, e.b FROM e2 e JOIN edges follow ON e.a = follow.b WHERE e.a != e.b AND e.a != LEAST(e.a, follow.a) | |
), e4 AS ( | |
SELECT LEAST(e.a, follow.a) AS a, e.b FROM e3 e JOIN edges follow ON e.a = follow.b WHERE e.a != e.b AND e.a != LEAST(e.a, follow.a) | |
), e5 AS ( | |
SELECT LEAST(e.a, follow.a) AS a, e.b FROM e4 e JOIN edges follow ON e.a = follow.b WHERE e.a != e.b AND e.a != LEAST(e.a, follow.a) | |
), e6 AS ( | |
SELECT LEAST(e.a, follow.a) AS a, e.b FROM e5 e JOIN edges follow ON e.a = follow.b WHERE e.a != e.b AND e.a != LEAST(e.a, follow.a) | |
), e7 AS ( | |
SELECT LEAST(e.a, follow.a) AS a, e.b FROM e6 e JOIN edges follow ON e.a = follow.b WHERE e.a != e.b AND e.a != LEAST(e.a, follow.a) | |
), e8 AS ( | |
SELECT LEAST(e.a, follow.a) AS a, e.b FROM e7 e JOIN edges follow ON e.a = follow.b WHERE e.a != e.b AND e.a != LEAST(e.a, follow.a) | |
), e9 AS ( | |
SELECT LEAST(e.a, follow.a) AS a, e.b FROM e8 e JOIN edges follow ON e.a = follow.b WHERE e.a != e.b AND e.a != LEAST(e.a, follow.a) | |
), e10 AS ( | |
SELECT LEAST(e.a, follow.a) AS a, e.b FROM e9 e JOIN edges follow ON e.a = follow.b WHERE e.a != e.b AND e.a != LEAST(e.a, follow.a) | |
) | |
SELECT | |
a AS cluster_id, | |
b AS node_id, | |
degrees | |
FROM ( | |
SELECT a, a AS b, 0 AS degrees FROM edges | |
UNION ALL SELECT b AS a, b, 0 AS degrees FROM edges | |
UNION ALL SELECT *, 1 AS degrees FROM edges e WHERE a != b | |
UNION ALL SELECT *, 2 AS degrees FROM e1 | |
UNION ALL SELECT *, 3 AS degrees FROM e2 | |
UNION ALL SELECT *, 4 AS degrees FROM e3 | |
UNION ALL SELECT *, 5 AS degrees FROM e4 | |
UNION ALL SELECT *, 6 AS degrees FROM e5 | |
UNION ALL SELECT *, 7 AS degrees FROM e6 | |
UNION ALL SELECT *, 8 AS degrees FROM e7 | |
UNION ALL SELECT *, 9 AS degrees FROM e8 | |
UNION ALL SELECT *, 10 AS degrees FROM e9 | |
UNION ALL SELECT *, 11 AS degrees FROM e10 | |
) | |
WHERE TRUE | |
QUALIFY ROW_NUMBER() OVER (PARTITION BY b ORDER BY a, degrees) = 1 | |
"""; | |
END; | |
-- Example | |
CREATE TEMP TABLE edges AS SELECT * FROM UNNEST(CAST( | |
ARRAY[(0, 1), (1, 2), (2, 3), (2, 4), (4, 5), (0, 5), (6, 7), (7, 8)] | |
AS ARRAY<STRUCT<a INT64, b INT64>> | |
)) e; | |
CALL `mydataset`.connected_components_by_edge("edges", "a", "b", "clusters"); | |
SELECT * FROM clusters ORDER BY cluster_id, node_id; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment