elliottcordo · September 17, 2014 14:00
diff --git a/hive_update_strategy.hql b/hive_update_strategy.hql
 /* sales.csv
 pizza,10.50,1,20140901
 golf balls,4.44,1,20140901
 hair gel,5,1,20140902
 cream puffs,1.24,1,20140908
 */

 /* sales2.csv
 apples,4,1,20140908
 frogs,3,1,20140908
 */


 --this is where we put the data to be processed today
 create external table stg_sales (
  item_name string,
  sales_amt float,
  sales_count int,
  date_id  int)
  row format delimited
  fields terminated by ','
  location '/staging/stg_sales';

 --this is the end user fact table with proper partitions
 create external table sales (
  item_name string,
  sales_amt float,
  sales_count int ) 
  partitioned by (date_id  int)
  row format delimited
  fields terminated by ','
  location '/warehouse/sales';

 --insert data to fact
 set hive.exec.dynamic.partition.mode=nonstrict
 insert overwrite table sales partition(date_id)
 select item_name,sales_amt,sales_count,date_id from stg_sales;

 --archive data
 fs -mv /staging/stg_sales/* /archive/stg_sale

 --now a new file
 --hadoop fs -copyFromLocal sales2.csv /staging/stg_sales

 set hive.exec.dynamic.partition.mode=nonstrict
 insert overwrite table sales partition(date_id)
 select item_name,sales_amt,sales_count,date_id from stg_sales
	/* sales.csv
	pizza,10.50,1,20140901
	golf balls,4.44,1,20140901
	hair gel,5,1,20140902
	cream puffs,1.24,1,20140908
	*/

	/* sales2.csv
	apples,4,1,20140908
	frogs,3,1,20140908
	*/


	--this is where we put the data to be processed today
	create external table stg_sales (
	item_name string,
	sales_amt float,
	sales_count int,
	date_id int)
	row format delimited
	fields terminated by ','
	location '/staging/stg_sales';

	--this is the end user fact table with proper partitions
	create external table sales (
	item_name string,
	sales_amt float,
	sales_count int )
	partitioned by (date_id int)
	row format delimited
	fields terminated by ','
	location '/warehouse/sales';

	--insert data to fact
	set hive.exec.dynamic.partition.mode=nonstrict
	insert overwrite table sales partition(date_id)
	select item_name,sales_amt,sales_count,date_id from stg_sales;

	--archive data
	fs -mv /staging/stg_sales/* /archive/stg_sale

	--now a new file
	--hadoop fs -copyFromLocal sales2.csv /staging/stg_sales

	set hive.exec.dynamic.partition.mode=nonstrict
	insert overwrite table sales partition(date_id)
	select item_name,sales_amt,sales_count,date_id from stg_sales