Skip to content

Instantly share code, notes, and snippets.

@juhaelee
Created May 5, 2022 18:58
Show Gist options
  • Save juhaelee/fcae1717d35b71c12e18ce908f252b9a to your computer and use it in GitHub Desktop.
Save juhaelee/fcae1717d35b71c12e18ce908f252b9a to your computer and use it in GitHub Desktop.
with fake_data as (
/* Generate 10 fake users and many audiences */
select
uniform(1, 10, random()) as user_id,
'a-' || substr(uuid_string(), 1, 5) as audience_id
from table(generator(rowcount => 500))
order by 1
),
max_n as (
// for each user_id, bucket into n groups, here we are doing 5 but can change this to any number.
// we use trunc to get rid of the decimal and keep only the integer portion of the number.
// ex trunc (1.2) => 1
select
user_id,
audience_id,
trunc(row_number() over(partition by user_id order by audience_id) / 5) as user_id_audience_group
from fake_data
),
agg as (
/* we aggergate by user id and the audience group and combine them into one array, this means only a max of 5 audiences per row */
select
user_id,
user_id_audience_group,
array_agg(audience_id) as audience_agg
from max_n
group by 1, 2
),
agg_str as (
/* finally we convert to a string a split by a comma */
select user_id,
user_id_audience_group,
array_to_string(audience_agg, ',') as audience_list
from agg
)
select * from agg_str
pivot(max(audience_list) for user_id_audience_group in (1,2,3,4,5,6,7,8,9,10))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment