We want to have 1 row by id, but not just aggregating all values with simple array_agg
,
but removing null and empty fields, removing duplicates,
and doing it for array field and 2D array field too.
Last active
February 28, 2024 22:16
-
-
Save bityob/181656ae4c3c7f22155523dcfef579ed to your computer and use it in GitHub Desktop.
Postgres - Aggregate multiple rows to 1 row with removing duplicates, null, empty fields on text, array and 2D array types
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--drop table data | |
--truncate table data | |
create temp table data (id int, s text, u text, start_time timestamp); | |
insert into data values | |
(1, null, null, null), | |
(1, '{123, 456}', '{{111, 222, 333}, {444, 555, 666}}', '2024-02-20'), | |
(1, '{456, 789}', '{{111, 222, 333}}', '2024-02-21'), | |
(1, '{123}', '{{777, 888, 999}}', '2024-02-22'), | |
(1, '', '{{111, 222, 333}, {444, 555, 666}}', '2024-02-23'), | |
(1, '{}', '{}', '2024-02-24'), | |
(1, '{}', '{}', '2024-05-31'), | |
(2, null, null, null), | |
(2, '{9123, 9456}', '{{9111, 9222, 9333}, {9444, 9555, 9666}}', '2024-03-20'), | |
(2, '{9456, 9789}', '{{9111, 9222, 9333}}', '2024-03-21'), | |
(2, '{9123}', '{{9777, 9888, 9999}}', '2024-03-22'), | |
(2, '', '{{9111, 9222, 9333}, {9444, 9555, 9666}}', '2024-03-23'), | |
(2, '{}', '{}', '2024-03-29'), | |
(2, '{}', '{}', '2024-03-24'); | |
select * from data; | |
-- Source: https://stackoverflow.com/a/8142998 | |
CREATE OR REPLACE FUNCTION unnest_2d_1d(anyarray) | |
RETURNS SETOF anyarray | |
LANGUAGE sql IMMUTABLE PARALLEL SAFE STRICT AS | |
$func$ | |
SELECT array_agg($1[d1][d2]) | |
FROM generate_subscripts($1,1) d1 | |
, generate_subscripts($1,2) d2 | |
GROUP BY d1 | |
ORDER BY d1 | |
$func$; | |
with step1 as ( | |
select id, | |
-- Ignore field if is null or empty | |
array_agg(s) filter (where s is not null and s<>'' and s<>'{}') as s, | |
array_agg(u) filter (where u is not null and u<>'' and u<>'{}') as u, | |
max(start_time) as start_time | |
from data | |
group by id | |
), | |
step2 as ( | |
select id, array_agg(s) as s | |
from | |
( | |
-- Remove duplications | |
select distinct id, unnest(unnest(s)::varchar[]) as s | |
from step1 | |
) as step2_internal | |
group by id | |
), | |
step3 as ( | |
select id, array_agg(u) as u | |
from | |
( | |
-- Remove duplications | |
select distinct id, unnest_2d_1d(unnest(u::varchar[])::varchar[]) as u | |
from step1 | |
) as step3_internal | |
group by id | |
) | |
select step1.id, | |
step1.start_time, | |
--step1.s as orig_s, | |
step2.s, | |
--step1.u as orig_u, | |
step3.u | |
from step1 | |
join step2 on step1.id=step2.id | |
join step3 on step1.id=step3.id; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment