Skip to content

Instantly share code, notes, and snippets.

@clrcrl
Last active February 4, 2023 04:59
Show Gist options
  • Save clrcrl/58062fbbe2af91cceab0e9187a510101 to your computer and use it in GitHub Desktop.
Save clrcrl/58062fbbe2af91cceab0e9187a510101 to your computer and use it in GitHub Desktop.
Macros to unnest arrays
-- this is a model - put it in the models/ directory
WITH nested_table AS (
SELECT 1 AS id, 'a' AS tags
UNION
SELECT 2 AS id, 'a, b' AS tags
UNION
SELECT 3 AS id, 'c , d' AS tags
UNION
SELECT 4 AS id, 'a,b,e' AS tags
UNION
SELECT 5 AS id, NULL AS tags
UNION
SELECT 6 AS id, '' AS tags
UNION
SELECT 7 AS id, ' ' AS tags
)
, unnested_table AS (
{{unnest_delimited_list("nested_table", "id", "tags")}}
)
SELECT
*
FROM unnested_table
-- this is a model - put it in the models/ directory
WITH nested_table AS (
SELECT 1 AS id, '["a"]' AS tags
UNION
SELECT 2 AS id, '["a"," b"]' AS tags
UNION
SELECT 3 AS id, '["c ","d"]' AS tags
UNION
SELECT 4 AS id, '["a", "b", "e"]' AS tags
UNION
SELECT 5 AS id, NULL AS tags
UNION
SELECT 6 AS id, '' AS tags
UNION
SELECT 7 AS id, '[]' AS tags
)
, unnested_table AS (
{{unnest_json_array("nested_table", "id", "tags")}}
)
SELECT
*
FROM unnested_table
-- this is a macro - put it in the macros/ directory
{% macro unnest_delimited_list(nested_table, id_column, nested_column, delimiter = ",") -%}
WITH nested_rows AS (
SELECT
{{id_column}} AS id
, NULLIF(TRIM({{nested_column}}),'') AS nested_value
, REGEXP_COUNT(NULLIF(TRIM({{nested_column}}),''), '{{delimiter}}') AS length
FROM {{nested_table}}
WHERE NULLIF(TRIM({{nested_column}}),'') IS NOT NULL
)
, sequence AS (
SELECT
i
FROM public.numbers -- assuming you have a number sequence stored which starts at 0
WHERE < (SELECT MAX(length) FROM nested_rows)
)
SELECT
nested_rows.id AS {{id_column}}
, TRIM(SPLIT_PART(nested_rows.nested_value, '{{delimiter}}', sequence.i + 1)) AS {{nested_column}}_unnested
FROM nested_rows
INNER JOIN sequence ON sequence.i <= nested_rows.length
{%- endmacro %}
-- this is a macro - put it in the macros/ directory
{% macro unnest_json_array(nested_table, id_column, nested_column) -%}
WITH nested_rows AS (
SELECT
{{id_column}} AS id
, {{nested_column}} AS nested_value
, JSON_ARRAY_LENGTH({{nested_column}}, true) AS length
FROM {{nested_table}}
WHERE JSON_ARRAY_LENGTH({{nested_column}}, true) > 0
)
, sequence AS (
SELECT
i
FROM public.numbers -- assuming you have a number sequence stored which starts at 0
WHERE i < (SELECT MAX(length) FROM nested_rows)
)
SELECT
nested_rows.id AS {{id_column}}
, TRIM(JSON_EXTRACT_ARRAY_ELEMENT_TEXT(nested_rows.nested_value, sequence.i)) AS {{nested_column}}_unnested
FROM nested_rows
INNER JOIN sequence ON sequence.i < nested_rows.length
{%- endmacro %}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment