Last active
April 8, 2021 15:35
-
-
Save torsten/7935120 to your computer and use it in GitHub Desktop.
Expanding JSON arrays to rows with SQL on RedShift: All queries to reproduce the technique from my blog post https://torsten.io/stdout/expanding-json-arrays-to-rows
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Related blog post to this Gist: | |
-- https://torsten.io/stdout/expanding-json-arrays-to-rows | |
-- Run these commands on a interactive RedShift session: | |
CREATE TEMP TABLE clusters AS ( | |
SELECT 1 AS id, '[1, 2]' AS node_sizes UNION ALL | |
SELECT 2 AS id, '[5, 1, 3]' AS node_sizes UNION ALL | |
SELECT 3 AS id, '[2]' AS node_sizes | |
); | |
-- This is just a temp table to leave no trace after the session | |
-- In practice I am using a `CREATE VIEW` and more rows. | |
CREATE TEMP TABLE seq_0_to_100 AS ( | |
SELECT 0 AS i UNION ALL | |
SELECT 1 UNION ALL | |
SELECT 2 UNION ALL | |
SELECT 3 UNION ALL | |
SELECT 4 UNION ALL | |
SELECT 5 | |
-- I am stopping here, you could easily generate this as a VIEW with 100+ real rows... | |
); | |
-- To see the intermediate relation: | |
SELECT id, JSON_EXTRACT_ARRAY_ELEMENT_TEXT(node_sizes, seq.i) AS size | |
FROM clusters, seq_0_to_100 AS seq | |
WHERE seq.i < JSON_ARRAY_LENGTH(node_sizes) | |
ORDER BY 1, 2; | |
-- To fetch the maximum size: | |
WITH exploded_array AS ( | |
SELECT id, JSON_EXTRACT_ARRAY_ELEMENT_TEXT(node_sizes, seq.i) AS size | |
FROM clusters, seq_0_to_100 AS seq | |
WHERE seq.i < JSON_ARRAY_LENGTH(node_sizes) | |
) | |
SELECT max(size) | |
FROM exploded_array; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks for this amazing hack 👍
Btw, you can create the sequence table with less manual work: