Last active
April 25, 2016 00:26
-
-
Save fran0x/fdc11868666f0a30c11e to your computer and use it in GitHub Desktop.
Assignment from https://www.coursera.org/learn/bigdata-analytics
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# load the "orders" table from Hive into a DataFrame | |
orders_df=sqlCtx.sql("select * from orders") | |
orders_df.printSchema() | |
# 1) calculate number of orders in SUSPECTED_FRAUD status | |
sqlCtx.select("select count(order_id) from orders where order_status='SUSPECTED_FRAUD'").show(5) | |
# load the "order_items" table from Hive into a DataFrame | |
order_items_df=sqlCtx.sql("select * from order_items") | |
order_items_df.printSchema() | |
# 2) calculate the 3rd highest order amount from order_items | |
sqlCtx.select(""" | |
select order_item_order_id, sum(order_item_subtotal) as total | |
from order_items | |
group by order_item_order_id | |
order by total desc | |
""").show(5) | |
# 3) calculate average product price ("order_item_product_price") for products that belong to "COMPLETE" orders | |
sql_ctx.sql(""" | |
select avg(oi.order_item_product_price) | |
from order_items oi | |
inner join orders o on oi.order_item_order_id=o.order_id | |
where 1=1 and o.order_status='COMPLETE' | |
""").cache().show(5) | |
# 4) calculate maximum amount a single customer ordered considering only orders in "COMPLETE" status | |
sql_ctx.sql(""" | |
select o.order_customer_id,sum(oi.order_item_subtotal) as total | |
from order_items oi | |
inner join orders o on oi.order_item_order_id=o.order_id | |
where 1=1 and o.order_status='COMPLETE' | |
group by o.order_customer_id | |
order by total desc | |
""").show(5) | |
# 5) calculate largest across all total amounts on orders which are not complete | |
sql_ctx.sql(""" | |
select o.order_id,sum(oi.order_item_subtotal) as total | |
from order_items oi | |
inner join orders o on oi.order_item_order_id=o.order_id | |
where 1=1 and o.order_status!='COMPLETE' | |
group by o.order_id | |
order by total desc | |
""").show(5) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment