Skip to content

Instantly share code, notes, and snippets.

@ducchetrongminh
Last active December 12, 2023 05:52
Show Gist options
  • Save ducchetrongminh/841bdc963c725db973b45e19b0852d03 to your computer and use it in GitHub Desktop.
Save ducchetrongminh/841bdc963c725db973b45e19b0852d03 to your computer and use it in GitHub Desktop.
Create bigquery sample data
CREATE SCHEMA `vit-lam-data.public_data_sample`
OPTIONS (
location = 'US'
);
CREATE OR REPLACE TABLE `vit-lam-data.public_data_sample.hacker_news`
LIKE `bigquery-public-data.hacker_news.full`
OPTIONS (
description = '''Sample data of Hacker News.
Original table: bigquery-public-data.hacker_news.full
Doc: https://console.cloud.google.com/marketplace/product/y-combinator/hacker-news
'''
)
AS (
SELECT * FROM `bigquery-public-data.hacker_news.full`
TABLESAMPLE SYSTEM (5 PERCENT)
)
;
CREATE OR REPLACE TABLE `vit-lam-data.public_data_sample.iowa_liquor_sales`
LIKE `bigquery-public-data.iowa_liquor_sales.sales`
OPTIONS (
description = '''Sample data of Iowa Liquor Sales.
Original table: bigquery-public-data.iowa_liquor_sales.sales
Doc: https://console.cloud.google.com/marketplace/product/iowa-department-of-commerce/iowa-liquor-sales
'''
)
AS (
SELECT * FROM `bigquery-public-data.iowa_liquor_sales.sales`
TABLESAMPLE SYSTEM (7 PERCENT)
ORDER BY date, invoice_and_item_number
)
;
CREATE OR REPLACE TABLE `vit-lam-data.public_data_sample.listenbrainz`
LIKE `bigquery-public-data.listenbrainz.listen`
OPTIONS (
description = '''Sample data of Listenbrainz.
Original table: bigquery-public-data.listenbrainz.listen
Doc: https://console.cloud.google.com/marketplace/product/metabrainz/listenbrainz
'''
)
AS (
SELECT * FROM `bigquery-public-data.listenbrainz.listen`
TABLESAMPLE SYSTEM (2 PERCENT)
)
;
CREATE OR REPLACE TABLE `vit-lam-data.public_data_sample.pypi_file_downloads`
LIKE `bigquery-public-data.pypi.file_downloads`
OPTIONS (
description = '''Sample data of PyPI File Downloads.
Original table: bigquery-public-data.pypi.file_downloads
Doc: https://console.cloud.google.com/marketplace/product/gcp-public-data-pypi/pypi
'''
)
AS (
SELECT * FROM `bigquery-public-data.pypi.file_downloads`
TABLESAMPLE SYSTEM (0.0007 PERCENT)
WHERE timestamp BETWEEN '2022-01-01' AND '2023-01-01'
)
;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment