Skip to content

Instantly share code, notes, and snippets.

@ericsahit
Last active August 29, 2015 14:08
Show Gist options
  • Save ericsahit/26769e6beb4981fe85b6 to your computer and use it in GitHub Desktop.
Save ericsahit/26769e6beb4981fe85b6 to your computer and use it in GitHub Desktop.
Shark-TPCDS-Query-3-8-10
set mapred.reduce.tasks=500;
drop table result;
create table result as select channel, col_name, d_year, d_qoy, i_category, COUNT(*) sales_cnt, SUM(ext_sales_price) sales_amt FROM (SELECT 'web' as channel, 'ws_bill_customer_sk' col_name, d_year, d_qoy, i_category, ws_ext_sales_price ext_sales_price
FROM web_sales JOIN item ON (web_sales.ws_item_sk=item.i_item_sk) JOIN date_dim on (web_sales.ws_sold_date_sk=date_dim.d_date_sk)
WHERE ws_bill_customer_sk IS NULL
) foo
GROUP BY channel, col_name, d_year, d_qoy, i_category
ORDER BY channel, col_name, d_year, d_qoy, i_category
limit 100;
set mapred.reduce.tasks=500;
drop table result;
create table result as select
count(distinct WS.ws_order_number) as order_count
,sum(ws_ext_ship_cost) as total_shipping_cost
,sum(ws_net_profit) as total_net_profit
from web_site WES
JOIN web_sales WS on (WS.ws_web_site_sk = WES.web_site_sk and WES.web_company_name = 'pri')
LEFT OUTER JOIN (select ws_order_number
from (select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2. ws_warehouse_sk wh2
from web_sales ws1 JOIN web_sales ws2
on (ws1.ws_order_number = ws2. ws_order_number) where
ws1.ws_warehouse_sk <> ws2. ws_warehouse_sk) t1) t2
on (WS.ws_order_number=t2. ws_order_number)
LEFT OUTER JOIN (select wr_order_number
from web_returns WR JOIN (select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2.ws_warehouse_sk wh2
from web_sales ws1 JOIN web_sales ws2
on (ws1.ws_order_number = ws2.ws_order_number)
where ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk ) ws_wh
on (WR.wr_order_number = ws_wh.ws_order_number)) t3
on (WS.ws_order_number=t3.wr_order_number)
JOIN date_dim DD on (WS.ws_ship_date_sk = DD.d_date_sk and DD.d_date between '2000-5-01' and '2000-7-01')
JOIN customer_address CA on (WS.ws_ship_addr_sk = CA.ca_address_sk and CA.ca_state = '0')
order by order_count
limit 100;
set mapred.reduce.tasks=500;
drop table result;
create table result as select ca_zip, ca_city, sum(ws_sales_price)
from item IT
JOIN web_sales WS on (WS.ws_item_sk = IT.i_item_sk)
JOIN customer C on (WS.ws_bill_customer_sk =C. c_customer_sk)
JOIN customer_address CA on (C.c_current_addr_sk = CA.ca_address_sk)
JOIN date_dim DD on (WS.ws_sold_date_sk = DD.d_date_sk and DD.d_qoy = 2 and DD.d_year = 2000)
left outer join (select i_item_id from item where i_item_sk in (2, 3, 5, 7, 11, 13, 17, 19, 23, 29)) t1 on (IT.i_item_id = t1.i_item_id)
where
substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', '85392', '85460', '80348', '81792')
group by ca_zip, ca_city
order by ca_zip, ca_city
limit 100;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment