alaiacano · December 14, 2015 23:09
diff --git a/build_report.sh b/build_report.sh
 # get rid of old results
 rm -f *.csv

 # run a hive query to get some action counts for each user.
 hive -e "SELECT user, count(*) as actions FROM post_table WHERE dt=${YESTERDAY_DATE} GROUP BY user" > archive_data.csv

 # grab today's data from scribe server
 scp scribe:/var/log/scribe/post_table/*_current raw_data_${TODAY_DATE}.csv

 # parse today's data into the same format (groupby/count) and append it to the archive
 python26 parse_scribe_data.py raw_data_${TODAY_DATE}.csv > clean_data_${TODAY_DATE}.csv

 # combine the results
 cat clean_data_${TODAY_DATE}.csv > combined_data.csv
 cat archive_data.csv >> combined_data.csv

 # generate the report, which loads combined_data.csv
 R CMD BATCH generate_report.R
	# get rid of old results
	rm -f *.csv

	# run a hive query to get some action counts for each user.
	hive -e "SELECT user, count(*) as actions FROM post_table WHERE dt=${YESTERDAY_DATE} GROUP BY user" > archive_data.csv

	# grab today's data from scribe server
	scp scribe:/var/log/scribe/post_table/*_current raw_data_${TODAY_DATE}.csv

	# parse today's data into the same format (groupby/count) and append it to the archive
	python26 parse_scribe_data.py raw_data_${TODAY_DATE}.csv > clean_data_${TODAY_DATE}.csv

	# combine the results
	cat clean_data_${TODAY_DATE}.csv > combined_data.csv
	cat archive_data.csv >> combined_data.csv

	# generate the report, which loads combined_data.csv
	R CMD BATCH generate_report.R