Skip to content

Instantly share code, notes, and snippets.

@bearloga
Last active June 25, 2024 21:26
Show Gist options
  • Save bearloga/b0ca0b3ebd7ca427beeb68fe920a66e7 to your computer and use it in GitHub Desktop.
Save bearloga/b0ca0b3ebd7ca427beeb68fe920a66e7 to your computer and use it in GitHub Desktop.
T368326: Data modeling multiple experiments
-- Usage: spark3-sql -f demo_data.sql -d db=bearloga
DROP TABLE IF EXISTS ${db}.demo_data_v1;
CREATE TABLE IF NOT EXISTS ${db}.demo_data_v1 (
user_id int,
action string,
experiments array<map<string, string>>
)
LOCATION '/tmp/${db}/demo_data_v1';
INSERT INTO ${db}.demo_data_v1 (user_id, action, experiments) VALUES
(1, 'click', ARRAY(MAP('name', 'test1', 'group', 'a'), MAP('name', 'test2', 'group', 'b'))),
(2, 'click', ARRAY()),
(3, 'click', ARRAY(MAP('name', 'test2', 'group', 'a'))),
(4, 'click', ARRAY(MAP('name', 'test1', 'group', 'b'), MAP('name', 'test2', 'group', 'a')))
;
DROP TABLE IF EXISTS ${db}.demo_data_v2;
CREATE TABLE IF NOT EXISTS ${db}.demo_data_v2 (
user_id int,
action string,
experiments struct<enrolled:array<string>, assigned:map<string, string>>
)
LOCATION '/tmp/${db}/demo_data_v2';
INSERT INTO ${db}.demo_data_v2 (user_id, action, experiments) VALUES
(1, 'click', NAMED_STRUCT('enrolled', ARRAY('test1', 'test2'), 'assigned', MAP('test1', 'a', 'test2', 'b'))),
(2, 'click', NAMED_STRUCT('enrolled', ARRAY(), 'assigned', NULL)),
(3, 'click', NAMED_STRUCT('enrolled', ARRAY('test2'), 'assigned', MAP('test2', 'a'))),
(4, 'click', NAMED_STRUCT('enrolled', ARRAY('test1', 'test2'), 'assigned', MAP('test1', 'b', 'test2', 'a')))
;
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment