Last active
February 26, 2024 16:03
-
-
Save dankrause/76baa0ad73ff19fd39e861600c56a15d to your computer and use it in GitHub Desktop.
An example of creating a recursive postgresql query to generate data about parent-child relationships within a single table.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE TABLE test | |
( | |
id INTEGER, | |
parent INTEGER | |
); | |
INSERT INTO test (id, parent) VALUES | |
(1, NULL), | |
(2, 1), | |
(3, 1), | |
(4, NULL), | |
(5, 4), | |
(6, 4), | |
(7, 2), | |
(8, 3), | |
(9, 5), | |
(10, 5), | |
(11, 8), | |
(12, 8); | |
-- 1 4 | |
-- / \ / \ | |
-- 2 3 5 6 | |
-- / / / \ | |
-- 7 8 9 10 | |
-- / \ | |
-- 11 12 | |
WITH RECURSIVE parents AS | |
( | |
SELECT | |
id AS id, | |
0 AS number_of_ancestors, | |
ARRAY [id] AS ancestry, | |
NULL :: INTEGER AS parent, | |
id AS start_of_ancestry | |
FROM test | |
WHERE | |
parent IS NULL | |
UNION | |
SELECT | |
child.id AS id, | |
p.number_of_ancestors + 1 AS ancestry_size, | |
array_append(p.ancestry, child.id) AS ancestry, | |
child.parent AS parent, | |
coalesce(p.start_of_ancestry, child.parent) AS start_of_ancestry | |
FROM test child | |
INNER JOIN parents p ON p.id = child.parent | |
) | |
SELECT | |
id, | |
number_of_ancestors, | |
ancestry, | |
parent, | |
start_of_ancestry | |
FROM parents; | |
-- id | number_of_ancestors | ancestry | parent | start_of_ancestry | |
-- 1 | 0 | {1} | NULL | 1 | |
-- 2 | 1 | {1,2} | 1 | 1 | |
-- 3 | 1 | {1,3} | 1 | 1 | |
-- 4 | 0 | {4} | NULL | 4 | |
-- 5 | 1 | {4,5} | 4 | 4 | |
-- 6 | 1 | {4,6} | 4 | 4 | |
-- 7 | 2 | {1,2,7} | 2 | 1 | |
-- 8 | 2 | {1,3,8} | 3 | 1 | |
-- 9 | 2 | {4,5,9} | 5 | 4 | |
-- 10 | 2 | {4,5,10} | 5 | 4 | |
-- 11 | 3 | {1,3,8,11} | 8 | 1 | |
-- 12 | 3 | {1,3,8,12} | 8 | 1 |
Nice hack with the coalesce and the array append. I'm used to using list_agg but this is better for some use cases.
Thanks. Would it be beneficial to create an index for the parent
field or a combined index for both id
and parent
?
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Very helpful, thank you!
For varchar id type, I have to cast array to varchar(255)[] to make it work:
ARRAY [id]::varchar(255)[] AS ancestry,
array_append(p.ancestry, child.id)::varchar(255)[] AS ancestry,
If no cast, there will be error notice:
recursive query "parents" column 2 has type character varying(20)[] in non-recursive term but type character varying[] overall
Cast the output of the non-recursive term to the correct type.