Created
May 5, 2016 06:30
-
-
Save NitinKumar94/fbca5d56caa6c150eaa4c8528a63252c to your computer and use it in GitHub Desktop.
Aggregation query for benchmarking
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
------------Query for basic aggregation for performance benchmarking--------------- | |
set hive.tez.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; | |
set hive.tez.container.size=${hiveconf:CONTAINER_SIZE}; | |
set hive.tez.cpu.vcores=${hiveconf:VCORES}; | |
--set tez.grouping.split-count=15; | |
use benchmarking; | |
--explain | |
create table ${hiveconf:TARGET_TABLE_NAME} as | |
select | |
b.subscriber_id as subscriber_id, | |
hour(b.time_stamp) as hour_of_day, | |
max(offnet_upload_volume) as max_offnet_upload, | |
max(offnet_download_volume) as max_offnet_download, | |
min(onnet_upload_volume) as min_onnet_upload, | |
min(onnet_download_volume) as min_onnet_download, | |
count(*) as session_count, | |
sum(b.upload_data_volume) as total_upload, | |
sum(b.download_data_volume) as total_download, | |
sum(b.upload_data_volume) + sum(download_data_volume) as total_data_volume, | |
sum(b.value) as total_value, | |
avg(b.upload_data_volume) as avg_upload_volume, | |
avg(b.download_data_volume) as avg_download_volume, | |
avg(b.value) as avg_value, | |
sum(b.download_data_volume)/sum(b.upload_data_volume) as usage_ratio, | |
sum(b.upload_data_volume)/(sum(b.upload_data_volume) + sum(b.download_data_volume)) * 100.0 as upload_data_percentage, | |
sum(b.download_data_volume)/(sum(b.upload_data_volume) + sum(b.download_data_volume)) * 100.0 as download_data_percentage, | |
sum(b.offnet_upload_volume) as offnet_upload_volume, | |
sum(b.offnet_download_volume) as offnet_download_volume, | |
sum(b.offnet_value) as offnet_value, | |
sum(b.onnet_upload_volume) as onnet_upload_volume, | |
sum(b.onnet_download_volume) as onnet_download_volume, | |
sum(b.onnet_value) as onnet_value, | |
sum(b.offline_upload_volume) as offline_upload_volume, | |
sum(b.offline_download_volume) as offline_download_volume, | |
sum(b.offline_value) as offline_value, | |
variance(b.upload_data_volume) as upload_variance, | |
variance(b.download_data_volume) as download_variance, | |
variance(b.value) as value_variance | |
from | |
( | |
select | |
subscriber_id, | |
upload_data_volume, | |
download_data_volume, | |
value, | |
time_stamp, | |
cell_site, | |
latitude, | |
longitude, | |
circle, | |
rate_group, | |
disconnect_reason, | |
if(rate_group = 'offnet',upload_data_volume,0) as offnet_upload_volume, | |
if(rate_group = 'offnet',download_data_volume,0) as offnet_download_volume, | |
if(rate_group = 'offnet',value,0) as offnet_value, | |
if(rate_group = 'onnet',upload_data_volume,0) as onnet_upload_volume, | |
if(rate_group = 'onnet',download_data_volume,0) as onnet_download_volume, | |
if(rate_group = 'onnet',value,0) as onnet_value, | |
if(rate_group = 'offline',upload_data_volume,0) as offline_upload_volume, | |
if(rate_group = 'offline',download_data_volume,0) as offline_download_volume, | |
if(rate_group = 'offline',value,0) as offline_value | |
from | |
${hiveconf:SOURCE_TABLE_NAME} | |
) b | |
group by | |
b.subscriber_id, hour(b.time_stamp); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment