Created
October 21, 2021 19:15
-
-
Save dertajora/993188e15de9af254637c202271d317b to your computer and use it in GitHub Desktop.
Query to Generate Fake Dataset in Google BigQuery
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- https://medium.com/google-cloud/yet-another-way-to-generate-fake-datasets-in-bigquery-93ee87c1008f | |
CREATE TEMP FUNCTION entity(seed INT64) | |
RETURNS STRUCT<transaction_id String, user_id Integer, transaction_date String, ledger_type String, points Integer, partner String, remarks String, country String> | |
LANGUAGE js | |
AS """ | |
var t = {}; | |
var items = [100,150,200,250,300,350,400] | |
var countries = ["sg", "id"] | |
var remark_list = ["completed article","completed survey","redeemed points"] | |
var types = ["redeemed", "earned"] | |
function makeid(length) { | |
var result = ''; | |
var characters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'; | |
var charactersLength = characters.length; | |
for ( var i = 0; i < length; i++ ) { | |
result += characters.charAt(Math.floor(Math.random() * | |
charactersLength)); | |
} | |
return result; | |
} | |
function randomDate(start, end) { | |
return new Date(start.getTime() + Math.random() * (end.getTime() - start.getTime())); | |
} | |
t.transaction_id = makeid(64) | |
t.user_id = Math.floor(Math.random() * 1000); | |
t.transaction_date = moment(randomDate(new Date(2021, 0, 1), new Date())).format('YYYY-MM-DD HH:MM:SS'); | |
t.ledger_type = types[Math.floor(Math.random()*types.length)]; | |
t.points = items[Math.floor(Math.random()*items.length)] | |
t.partner = "internal" | |
t.remarks = remark_list[Math.floor(Math.random()*remark_list.length)] | |
t.country = countries[Math.floor(Math.random()*countries.length)] | |
return t; | |
""" | |
OPTIONS ( | |
library=["gs://<bucket-name>/moment.js"] | |
); | |
-- SELECT entity().* | |
CREATE OR REPLACE TABLE dataset.ledger AS ( | |
SELECT row_num as id, entity(row_num).* | |
FROM dataset.row_seq | |
where row_num <= 150000000 | |
); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment