Skip to content

Instantly share code, notes, and snippets.

@RajenDharmendra
Last active May 26, 2020 05:28
Show Gist options
  • Save RajenDharmendra/2697c39bdd388e097a12ec19708dc707 to your computer and use it in GitHub Desktop.
Save RajenDharmendra/2697c39bdd388e097a12ec19708dc707 to your computer and use it in GitHub Desktop.
CREATE TABLE tpcdsch.customer_address
(
`ca_address_sk` Int8,
`ca_address_id` String,
`ca_street_number` String,
`ca_street_name` String,
`ca_street_type` String,
`ca_suite_number` String,
`ca_city` String,
`ca_county` String,
`ca_state` String,
`ca_zip` String,
`ca_country` String,
`ca_gmt_offset` Float,
`ca_location_type` String
)
ENGINE = MergeTree()
PARTITION BY ca_address_id
ORDER BY ca_street_number
SETTINGS index_granularity = 8192
---------------------------------------------------------------------------------
CREATE TABLE tpcdsch.customer_demographics
(
`cd_demo_sk` Int8,
`cd_gender` String,
`cd_marital_status` String,
`cd_education_status` String,
`cd_purchase_estimate` Int8,
`cd_credit_rating` String,
`cd_dep_count` Int8,
`cd_dep_employed_count` Int8,
`cd_dep_college_count` Int8
)
ENGINE = Log
__________________________________________________________________________
CREATE TABLE tpcdsch.customer_demographics
(
`cd_demo_sk` Int8,
`cd_gender` String,
`cd_marital_status` String,
`cd_education_status` String,
`cd_purchase_estimate` Int8,
`cd_credit_rating` String,
`cd_dep_count` Int8,
`cd_dep_employed_count` Int8,
`cd_dep_college_count` Int8
)
ENGINE = MergeTree()
PARTITION BY cd_credit_rating
ORDER BY cd_demo_sk
SETTINGS index_granularity = 8192
________________________________________________________
SELECT
countdistinct(cd_demo_sk),
countdistinct(cd_purchase_estimate),
countdistinct(cd_education_status),
countdistinct(cd_credit_rating)
FROM tpcdsch.customer_demographics
┌─uniqExact(cd_demo_sk)─┬─uniqExact(cd_purchase_estimate)─┬─uniqExact(cd_education_status)─┬─uniqExact(cd_credit_rating)─┐
│ 256 │ 20 │ 7 │ 4 │
└───────────────────────┴─────────────────────────────────┴────────────────────────────────┴─────────────────────────────┘
________________________________________________________________________________________________________________________
CREATE TABLE tpcdsch.warehouse
(
`w_warehouse_sk` int,
`w_warehouse_id` String,
`w_warehouse_name` String,
`w_warehouse_sq_ft` int,
`w_street_number` String,
`w_street_name` String,
`w_street_type` String,
`w_suite_number` String,
`w_city` String,
`w_county` String,
`w_state` String,
`w_zip` String,
`w_country` String,
`w_gmt_offset` Float
)
ENGINE = Log
________________________________________________________________________________________
CREATE TABLE tpcdsch.warehouse
(
`w_warehouse_sk` Int8,
`w_warehouse_id` String,
`w_warehouse_name` String,
`w_warehouse_sq_ft` int,
`w_street_number` String,
`w_street_name` String,
`w_street_type` String,
`w_suite_number` String,
`w_city` String,
`w_county` String,
`w_state` String,
`w_zip` String,
`w_country` String,
`w_gmt_offset` Float
)
ENGINE = MergeTree()
PARTITION BY tuple()
ORDER BY tuple()
SETTINGS index_granularity = 8192
______________________________________________________________________________________
aavin-dev :) select count(*) from warehouse;
SELECT count(*)
FROM warehouse
┌─count()─┐
│ 10 │
└─────────┘
1 rows in set. Elapsed: 0.002 sec.
______________________________________________________________________________________________________
CREATE TABLE tpcdsch.ship_mode
(
`sm_ship_mode_sk` Int8,
`sm_ship_mode_id` String,
`sm_type` String,
`sm_code` String,
`sm_carrier` String,
`sm_contract` String
)
ENGINE = MergeTree()
PARTITION BY tuple()
ORDER BY tuple()
SETTINGS index_granularity = 8192
_______________________________________________________________________________________
aavin-dev :) select count(*) from ship_mode;
SELECT count(*)
FROM ship_mode
┌─count()─┐
│ 40 │
└─────────┘
1 rows in set. Elapsed: 0.002 sec.
________________________________________________________________________________________
CREATE TABLE tpcdsch.time_dim
(
`t_time_sk` Int8,
`t_time_id` String,
`t_time` Int8,
`t_hour` Int8,
`t_minute` Int8,
`t_second` Int8,
`t_am_pm` String,
`t_shift` String,
`t_sub_shift` String,
`t_meal_time` String
)
ENGINE = ENGINE = MergeTree()
PARTITION BY (t_time_sk)
ORDER BY (t_time_id)
SETTINGS index_granularity = 8192
___________________________________________________________________________
SELECT
countdistinct(t_time_id),
countdistinct(t_time_sk)
FROM tpcdsch.time_dim
┌─uniqExact(t_time_id)─┬─uniqExact(t_time_sk)─┐
│ 86400 │ 256 │
└──────────────────────┴──────────────────────┘
___________________________________________________________________________________
CREATE TABLE tpcdsch.reason
(
`r_reason_sk` Int8,
`r_reason_id` String,
`r_reason_desc` String
)
ENGINE = MergeTree()
PARTITION BY tuple()
ORDER BY tuple()
SETTINGS index_granularity = 8192
__________________________________________________
CREATE TABLE tpcdsch.income_band
(
`ib_income_band_sk` Int8,
`ib_lower_bound` Int8,
`ib_upper_bound` Int8
)
ENGINE = MergeTree()
PARTITION BY tuple()
ORDER BY tuple()
SETTINGS index_granularity = 8192
Ok.
0 rows in set. Elapsed: 0.033 sec.
_____________________________________________________
CREATE TABLE tpcdsch.catalog_sales (
`cs_sold_date_sk` Int8,
`cs_sold_time_sk` Int8,
`cs_ship_date_sk` Int8,
`cs_bill_customer_sk` Int8,
`cs_bill_cdemo_sk` Int8,
`cs_bill_hdemo_sk` Int8,
`cs_bill_addr_sk` Int8,
`cs_ship_customer_sk` Int8,
`cs_ship_cdemo_sk` Int8,
`cs_ship_hdemo_sk` Int8,
`cs_ship_addr_sk` Int8,
`cs_call_center_sk` Int8,
`cs_catalog_page_sk` Int8,
`cs_ship_mode_sk` Int8,
`cs_warehouse_sk` Int8,
`cs_item_sk` Int8,
`cs_promo_sk` Int8,
`cs_order_number` Int8,
`cs_quantity` Int8,
`cs_wholesale_cost` Float,
`cs_list_price` Float,
`cs_sales_price` Float,
`cs_ext_discount_amt` Float,
`cs_ext_sales_price` Float,
`cs_ext_wholesale_cost` Float,
`cs_ext_list_price` Float,
`cs_ext_tax` Float,
`cs_coupon_amt` Float,
`cs_ext_ship_cost` Float,
`cs_net_paid` Float,
`cs_net_paid_inc_tax` Float,
`cs_net_paid_inc_ship` Float,
`cs_net_paid_inc_ship_tax` Float,
`cs_net_profit` Float)
ENGINE = MergeTree() PARTITION BY (cs_sold_date_sk) ORDER BY (cs_sold_time_sk) SETTINGS index_granularity = 8192
____________________________________________________________________________________________________________________
CREATE TABLE tpcdsch.call_center
(
`cc_call_center_sk` Int8,
`cc_call_center_id` String,
`cc_rec_start_date` Date,
`cc_rec_end_date` Date,
`cc_closed_date_sk` Int8,
`cc_open_date_sk` Int8,
`cc_name` String,
`cc_class` String,
`cc_employees` Int8,
`cc_sq_ft` Int8,
`cc_hours` String,
`cc_manager` String,
`cc_mkt_id` Int8,
`cc_mkt_class` String,
`cc_mkt_desc` String,
`cc_market_manager` String,
`cc_division` Int8,
`cc_division_name` String,
`cc_company` Int8,
`cc_company_name` String,
`cc_street_number` String,
`cc_street_name` String,
`cc_street_type` String,
`cc_suite_number` String,
`cc_city` String,
`cc_county` String,
`cc_state` String,
`cc_zip` String,
`cc_country` String,
`cc_gmt_offset` Float,
`cc_tax_percentage` Float
)
ENGINE = MergeTree() PARTITION BY toYYYYMM(cc_rec_start_date) ORDER BY cc_call_center_sk SETTINGS index_granularity = 8192
_____________________________________________________________________________________________________________________________
CREATE TABLE tpcdsch.date_dim
(
d_date_sk Int8,
d_date_id String,
d_date Int8,
d_month_seq Int8,
d_week_seq Int8,
d_quarter_seg Int8,
d_year Int8,
d_dow Int8,
d_moy Int8,
d_dom Int8,
d_qoy Int8,
d_fy_year Int8,
d_fy_quarter_seq Int8,
d_fy_week_seq Int8,
d_day_name String,
d_quarter_name String,
d_holiday String,
d_weekend String,
d_following_holiday String,
d_first_dom Int8,
d_last_dom Int8,
d_same_day_1y Int8,
d_same_day_1q Int8,
d_current_day String,
d_current_week String,
d_current_month String,
d_current_quarter String,
d_current_year String
)
ENGINE = MergeTree()
PARTITION BY (d_date_sk)
ORDER BY (d_date_id)
SETTINGS index_granularity = 8192;
________________________________________________________________________________________________________
CREATE TABLE tpcdsch.household_demographics
(
`hd_demo_sk` Int8,
`hd_income_band_sk` Int8,
`hd_buy_potential` String,
`hd_dep_count` Int8,
`hd_vehicle_count` Int8
)
ENGINE = MergeTree()
PARTITION BY tuple()
ORDER BY tuple()
SETTINGS index_granularity = 8192;
_______________________________________________________________________________________________________
CREATE TABLE tpcdsch.item
(
`i_item_sk` Int8,
`i_item_id` String,
`i_rec_start_date` Date,
`i_rec_end_date` Date,
`i_item_desc` String,
`i_current_price` Float,
`i_wholesale_cost` Float,
`i_brand_id` Int8,
`i_brand` String,
`i_class_id` Int8,
`i_class` String,
`i_category_id` Int8,
`i_category` String,
`i_manufact_id` Int8,
`i_manufact` String,
`i_size` String,
`i_formulation` String,
`i_color` String,
`i_units` String,
`i_container` String,
`i_manager_id` Int8,
`i_product_name` String
)
ENGINE = MergeTree()
PARTITION BY (i_item_sk)
ORDER BY (i_item_id)
SETTINGS index_granularity = 8192;
__________________________________________________________________________________________
CREATE TABLE tpcdsch.store (
s_store_sk Int8 ,
s_store_id String,
s_rec_start_date date,
s_rec_end_date date,
s_closed_date_sk Int8,
s_store_name String,
s_number_employees Int8,
s_floor_space Int8,
s_hours String,
s_manager String,
s_market_id Int8,
s_geography_class String,
s_market_desc String,
s_market_manager String,
s_division_id Int8,
s_division_name String,
s_company_id Int8,
s_company_name String,
s_street_number String,
s_street_name String,
s_street_type String,
s_suite_number String,
s_city String,
s_county String,
s_state String,
s_zip String,
s_country String,
s_gmt_offset Float,
s_tax_percentage Float
)
ENGINE = MergeTree()
PARTITION BY tuple()
ORDER BY tuple()
SETTINGS index_granularity = 8192;
___________________________________________________________________________________________
CREATE TABLE tpcdsch.customer
(
`c_customer_sk` Int8,
`c_customer_id` String,
`c_current_cdemo_sk` Int8,
`c_current_hdemo_sk` Int8,
`c_current_addr_sk` Int8,
`c_first_shipto_date_sk` Int8,
`c_first_sales_date_sk` Int8,
`c_salutation` String,
`c_first_name` String,
`c_last_name` String,
`c_preferred_cust_flag` String,
`c_birth_day` Int8,
`c_birth_month` Int8,
`c_birth_year` Int8,
`c_birth_country` String,
`c_login` String,
`c_email_address` String,
`c_last_review_date` Int8
)
ENGINE = MergeTree()
PARTITION BY c_birth_country
ORDER BY c_first_sales_date_sk
SETTINGS index_granularity = 8192;
_______________________________________________________________________________________________
CREATE TABLE tpcdsch.store_sales (
ss_sold_date_sk Int8,
ss_sold_time_sk Int8,
ss_item_sk Int8 ,
ss_customer_sk Int8,
ss_cdemo_sk Int8,
ss_hdemo_sk Int8,
ss_addr_sk Int8,
ss_store_sk Int8,
ss_promo_sk Int8,
ss_ticket_number Int8 ,
ss_quantity Int8,
ss_wholesale_cost Float,
ss_list_price Float,
ss_sales_price Float,
ss_ext_discount_amt Float,
ss_ext_sales_price Float,
ss_ext_wholesale_cost Float,
ss_ext_list_price Float,
ss_ext_tax Float,
ss_coupon_amt Float,
ss_net_paid Float,
ss_net_paid_inc_tax Float,
ss_net_profit Float
)
ENGINE = MergeTree()
PARTITION BY (ss_store_sk)
ORDER BY (ss_sold_date_sk)
SETTINGS index_granularity = 8192;
____________________________________________________________________________________
CREATE TABLE tpcdsch.web_sales (
ws_sold_date_sk Int8,
ws_sold_time_sk Int8,
ws_ship_date_sk Int8,
ws_item_sk Int8 ,
ws_bill_customer_sk Int8,
ws_bill_cdemo_sk Int8,
ws_bill_hdemo_sk Int8,
ws_bill_addr_sk Int8,
ws_ship_customer_sk Int8,
ws_ship_cdemo_sk Int8,
ws_ship_hdemo_sk Int8,
ws_ship_addr_sk Int8,
ws_web_page_sk Int8,
ws_web_site_sk Int8,
ws_ship_mode_sk Int8,
ws_warehouse_sk Int8,
ws_promo_sk Int8,
ws_order_number Int8 ,
ws_quantity Int8,
ws_wholesale_cost Float,
ws_list_price Float,
ws_sales_price Float,
ws_ext_discount_amt Float,
ws_ext_sales_price Float,
ws_ext_wholesale_cost Float,
ws_ext_list_price Float,
ws_ext_tax Float,
ws_coupon_amt Float,
ws_ext_ship_cost Float,
ws_net_paid Float,
ws_net_paid_inc_tax Float,
ws_net_paid_inc_ship Float,
ws_net_paid_inc_ship_tax Float,
ws_net_profit Float
)
ENGINE = MergeTree()
PARTITION BY (ws_warehouse_sk)
ORDER BY (ws_sold_date_sk)
SETTINGS index_granularity = 8192 ;
__________________________________________________________________________________________
CREATE TABLE tpcdsch.catalog_sales_flat
ENGINE = MergeTree
PARTITION BY cs_sold_date_sk
ORDER BY cs_sold_time_sk AS
SELECT
cs.cs_sold_date_sk AS cs_sold_date_sk,
cs.cs_sold_time_sk AS cs_sold_time_sk,
cs.cs_ship_date_sk AS cs_ship_date_sk,
cs.cs_bill_customer_sk AS cs_bill_customer_sk,
cs.cs_bill_cdemo_sk AS cs_bill_cdemo_sk,
cs.cs_bill_hdemo_sk AS cs_bill_hdemo_sk,
cs.cs_bill_addr_sk AS cs_bill_addr_sk,
cs.cs_ship_customer_sk AS cs_ship_customer_sk,
cs.cs_ship_cdemo_sk AS cs_ship_cdemo_sk,
cs.cs_ship_hdemo_sk AS cs_ship_hdemo_sk,
cs.cs_ship_addr_sk AS cs_ship_addr_sk,
cs.cs_call_center_sk AS cs_call_center_sk,
cs.cs_catalog_page_sk AS cs_catalog_page_sk,
cs.cs_ship_mode_sk AS cs_ship_mode_sk,
cs.cs_warehouse_sk AS cs_warehouse_sk,
cs.cs_item_sk AS cs_item_sk,
cs.cs_promo_sk AS cs_promo_sk,
cs.cs_order_number AS cs_order_number,
cs.cs_quantity AS cs_quantity,
cs.cs_wholesale_cost AS cs_wholesale_cost,
cs.cs_list_price AS cs_list_price,
cs.cs_sales_price AS cs_sales_price,
cs.cs_ext_discount_amt AS cs_ext_discount_amt,
cs.cs_ext_sales_price AS cs_ext_sales_price,
cs.cs_ext_wholesale_cost AS cs_ext_wholesale_cost,
cs.cs_ext_list_price AS cs_ext_list_price,
cs.cs_ext_tax AS cs_ext_tax,
cs.cs_coupon_amt AS cs_coupon_amt,
cs.cs_ext_ship_cost AS cs_ext_ship_cost,
cs.cs_net_paid AS cs_net_paid,
cs.cs_net_paid_inc_tax AS cs_net_paid_inc_tax,
cs.cs_net_paid_inc_ship AS cs_net_paid_inc_ship,
cs.cs_net_paid_inc_ship_tax AS cs_net_paid_inc_ship_tax,
cs.cs_net_profit AS cs_net_profit,
da.d_date_sk AS d_date_sk,
da.d_date_id AS d_date_id,
da.d_date AS d_date,
da.d_month_seq AS d_month_seq,
da.d_week_seq AS d_week_seq,
da.d_quarter_seg AS d_quarter_seg,
da.d_year AS d_year,
da.d_dow AS d_dow,
da.d_moy AS d_moy,
da.d_dom AS d_dom,
da.d_qoy AS d_qoy,
da.d_fy_year AS d_fy_year,
da.d_fy_quarter_seq AS d_fy_quarter_seq,
da.d_fy_week_seq AS d_fy_week_seq,
da.d_day_name AS d_day_name,
da.d_quarter_name AS d_quarter_name,
da.d_holiday AS d_holiday,
da.d_weekend AS d_weekend,
da.d_following_holiday AS d_following_holiday,
da.d_first_dom AS d_first_dom,
da.d_last_dom AS d_last_dom,
da.d_same_day_1y AS d_same_day_1y,
da.d_same_day_1q AS d_same_day_1q,
da.d_current_day AS d_current_day,
da.d_current_week AS d_current_week,
da.d_current_month AS d_current_month,
da.d_current_quarter AS d_current_quarter,
da.d_current_year AS d_current_year,
hd.hd_demo_sk AS hd_demo_sk,
hd.hd_income_band_sk AS hd_income_band_sk,
hd.hd_buy_potential AS hd_buy_potential,
hd.hd_dep_count AS hd_dep_count,
hd.hd_vehicle_count AS hd_vehicle_count,
it.i_item_sk AS i_item_sk,
it.i_item_id AS i_item_id,
it.i_rec_start_date AS i_rec_start_date,
it.i_rec_end_date AS i_rec_end_date,
it.i_item_desc AS i_item_desc,
it.i_current_price AS i_current_price,
it.i_wholesale_cost AS i_wholesale_cost,
it.i_brand_id AS i_brand_id,
it.i_brand AS i_brand,
it.i_class_id AS i_class_id,
it.i_class AS i_class,
it.i_category_id AS i_category_id,
it.i_category AS i_category,
it.i_manufact_id AS i_manufact_id,
it.i_manufact AS i_manufact,
it.i_size AS i_size,
it.i_formulation AS i_formulation,
it.i_color AS i_color,
it.i_units AS i_units,
it.i_container AS i_container,
it.i_manager_id AS i_manager_id,
it.i_product_name AS i_product_name,
cu.c_customer_sk AS c_customer_sk,
cu.c_customer_id AS c_customer_id,
cu.c_current_cdemo_sk AS c_current_cdemo_sk,
cu.c_current_hdemo_sk AS c_current_hdemo_sk,
cu.c_current_addr_sk AS c_current_addr_sk,
cu.c_first_shipto_date_sk AS c_first_shipto_date_sk,
cu.c_first_sales_date_sk AS c_first_sales_date_sk,
cu.c_salutation AS c_salutation,
cu.c_first_name AS c_first_name,
cu.c_last_name AS c_last_name,
cu.c_preferred_cust_flag AS c_preferred_cust_flag,
cu.c_birth_day AS c_birth_day,
cu.c_birth_month AS c_birth_month,
cu.c_birth_year AS c_birth_year,
cu.c_birth_country AS c_birth_country,
cu.c_login AS c_login,
cu.c_email_address AS c_email_address,
cu.c_last_review_date AS c_last_review_date,
cc.cc_call_center_sk AS cc_call_center_sk,
cc.cc_call_center_id AS cc_call_center_id,
cc.cc_rec_start_date AS cc_rec_start_date,
cc.cc_rec_end_date AS cc_rec_end_date,
cc.cc_closed_date_sk AS cc_closed_date_sk,
cc.cc_open_date_sk AS cc_open_date_sk,
cc.cc_name AS cc_name,
cc.cc_class AS cc_class,
cc.cc_employees AS cc_employees,
cc.cc_sq_ft AS cc_sq_ft,
cc.cc_hours AS cc_hours,
cc.cc_manager AS cc_manager,
cc.cc_mkt_id AS cc_mkt_id,
cc.cc_mkt_class AS cc_mkt_class,
cc.cc_mkt_desc AS cc_mkt_desc,
cc.cc_market_manager AS cc_market_manager,
cc.cc_division AS cc_division,
cc.cc_division_name AS cc_division_name,
cc.cc_company AS cc_company,
cc.cc_company_name AS cc_company_name,
cc.cc_street_number AS cc_street_number,
cc.cc_street_name AS cc_street_name,
cc.cc_street_type AS cc_street_type,
cc.cc_suite_number AS cc_suite_number,
cc.cc_city AS cc_city,
cc.cc_county AS cc_county,
cc.cc_state AS cc_state,
cc.cc_zip AS cc_zip,
cc.cc_country AS cc_country,
cc.cc_gmt_offset AS cc_gmt_offset,
cc.cc_tax_percentage AS cc_tax_percentage
FROM tpcdsch.catalog_sales AS cs
INNER JOIN tpcdsch.date_dim AS da ON cs.cs_sold_date_sk = da.d_date
INNER JOIN tpcdsch.household_demographics AS hd ON cs.cs_bill_hdemo_sk = hd.hd_demo_sk
INNER JOIN tpcdsch.item AS it ON cs.cs_item_sk = it.i_item_sk
INNER JOIN tpcdsch.customer AS cu ON cs.cs_ship_customer_sk = cu.c_customer_sk
INNER JOIN tpcdsch.call_center AS cc ON cs.cs_call_center_sk = cc.cc_call_center_sk
*****************************************************************************************************
@RajenDharmendra
Copy link
Author

Note:- For Dimension tables there is not much to do with partition so i used partition by tuple()

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment