Skip to content

Instantly share code, notes, and snippets.

@elliottcordo
Created September 17, 2014 14:00
Show Gist options
  • Save elliottcordo/ad34345ec9c1599561d3 to your computer and use it in GitHub Desktop.
Save elliottcordo/ad34345ec9c1599561d3 to your computer and use it in GitHub Desktop.
basic hive update strategy with dynamic partitioning
/* sales.csv
pizza,10.50,1,20140901
golf balls,4.44,1,20140901
hair gel,5,1,20140902
cream puffs,1.24,1,20140908
*/
/* sales2.csv
apples,4,1,20140908
frogs,3,1,20140908
*/
--this is where we put the data to be processed today
create external table stg_sales (
item_name string,
sales_amt float,
sales_count int,
date_id int)
row format delimited
fields terminated by ','
location '/staging/stg_sales';
--this is the end user fact table with proper partitions
create external table sales (
item_name string,
sales_amt float,
sales_count int )
partitioned by (date_id int)
row format delimited
fields terminated by ','
location '/warehouse/sales';
--insert data to fact
set hive.exec.dynamic.partition.mode=nonstrict
insert overwrite table sales partition(date_id)
select item_name,sales_amt,sales_count,date_id from stg_sales;
--archive data
fs -mv /staging/stg_sales/* /archive/stg_sale
--now a new file
--hadoop fs -copyFromLocal sales2.csv /staging/stg_sales
set hive.exec.dynamic.partition.mode=nonstrict
insert overwrite table sales partition(date_id)
select item_name,sales_amt,sales_count,date_id from stg_sales
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment