Created
July 4, 2012 15:22
-
-
Save rubensayshi/3047871 to your computer and use it in GitHub Desktop.
Hive MAPJOIN + LATERAL VIEW - explains
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| SELECT | |
| date_int, | |
| gender, | |
| part, | |
| COUNT( DISTINCT member_id ), | |
| SUM( partcount ) | |
| FROM ( | |
| SELECT | |
| /*+ MAPJOIN(members_map) */ | |
| date_int, | |
| visit_stats.member_id, | |
| members_map.gender as gender, | |
| parts | |
| FROM visit_stats | |
| JOIN members_map ON(members_map.member_id = visit_stats.member_id) | |
| ) visit_stats | |
| LATERAL VIEW explode(parts) partsTable AS part, partcount | |
| GROUP BY date_int, gender, part | |
| OK | |
| ABSTRACT SYNTAX TREE: | |
| (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_TABLE_OR_COL parts)) part partcount (TOK_TABALIAS partsTable))) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME visit_stats)) (TOK_TABREF (TOK_TABNAME members_map)) (= (. (TOK_TABLE_OR_COL members_map) member_id) (. (TOK_TABLE_OR_COL visit_stats) member_id)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST members_map))) (TOK_SELEXPR (TOK_TABLE_OR_COL date_int)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL visit_stats) member_id)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL members_map) gender) gender) (TOK_SELEXPR (TOK_TABLE_OR_COL parts))))) visit_stats))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL date_int)) (TOK_SELEXPR (TOK_TABLE_OR_COL gender)) (TOK_SELEXPR (TOK_TABLE_OR_COL part)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_TABLE_OR_COL member_id))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_TABLE_OR_COL partcount)))) (TOK_GROUPBY (TOK_TABLE_OR_COL date_int) (TOK_TABLE_OR_COL gender) (TOK_TABLE_OR_COL part)))) | |
| STAGE DEPENDENCIES: | |
| Stage-4 is a root stage | |
| Stage-1 depends on stages: Stage-4 | |
| Stage-0 is a root stage | |
| STAGE PLANS: | |
| Stage: Stage-4 | |
| Map Reduce Local Work | |
| Alias -> Map Local Tables: | |
| visit_stats:members_map | |
| Fetch Operator | |
| limit: -1 | |
| Alias -> Map Local Operator Tree: | |
| visit_stats:members_map | |
| TableScan | |
| alias: members_map | |
| HashTable Sink Operator | |
| condition expressions: | |
| 0 {date_int} {member_id} {parts} | |
| 1 {gender} | |
| handleSkewJoin: false | |
| keys: | |
| 0 [Column[member_id]] | |
| 1 [Column[member_id]] | |
| Position of Big Table: 0 | |
| Stage: Stage-1 | |
| Map Reduce | |
| Alias -> Map Operator Tree: | |
| visit_stats:visit_stats | |
| TableScan | |
| alias: visit_stats | |
| Map Join Operator | |
| condition map: | |
| Inner Join 0 to 1 | |
| condition expressions: | |
| 0 {date_int} {member_id} {parts} | |
| 1 {gender} | |
| handleSkewJoin: false | |
| keys: | |
| 0 [Column[member_id]] | |
| 1 [Column[member_id]] | |
| outputColumnNames: _col0, _col1, _col2, _col6 | |
| Position of Big Table: 0 | |
| File Output Operator | |
| compressed: false | |
| GlobalTableId: 0 | |
| table: | |
| input format: org.apache.hadoop.mapred.SequenceFileInputFormat | |
| output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat | |
| Local Work: | |
| Map Reduce Local Work | |
| Stage: Stage-0 | |
| Fetch Operator | |
| limit: -1 | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| SELECT | |
| date_int, | |
| gender, | |
| COUNT( DISTINCT member_id ) | |
| FROM ( | |
| SELECT | |
| /*+ MAPJOIN(members_map) */ | |
| date_int, | |
| visit_stats.member_id, | |
| members_map.gender as gender, | |
| parts | |
| FROM visit_stats | |
| JOIN members_map ON(members_map.member_id = visit_stats.member_id) | |
| ) visit_stats | |
| GROUP BY date_int, gender | |
| ABSTRACT SYNTAX TREE: | |
| (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME visit_stats)) (TOK_TABREF (TOK_TABNAME members_map)) (= (. (TOK_TABLE_OR_COL members_map) member_id) (. (TOK_TABLE_OR_COL visit_stats) member_id)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST members_map))) (TOK_SELEXPR (TOK_TABLE_OR_COL date_int)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL visit_stats) member_id)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL members_map) gender) gender) (TOK_SELEXPR (TOK_TABLE_OR_COL parts))))) visit_stats)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL date_int)) (TOK_SELEXPR (TOK_TABLE_OR_COL gender)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_TABLE_OR_COL member_id)))) (TOK_GROUPBY (TOK_TABLE_OR_COL date_int) (TOK_TABLE_OR_COL gender)))) | |
| STAGE DEPENDENCIES: | |
| Stage-4 is a root stage | |
| Stage-1 depends on stages: Stage-4 | |
| Stage-2 depends on stages: Stage-1 | |
| Stage-0 is a root stage | |
| STAGE PLANS: | |
| Stage: Stage-4 | |
| Map Reduce Local Work | |
| Alias -> Map Local Tables: | |
| visit_stats:members_map | |
| Fetch Operator | |
| limit: -1 | |
| Alias -> Map Local Operator Tree: | |
| visit_stats:members_map | |
| TableScan | |
| alias: members_map | |
| HashTable Sink Operator | |
| condition expressions: | |
| 0 {date_int} {member_id} | |
| 1 {gender} | |
| handleSkewJoin: false | |
| keys: | |
| 0 [Column[member_id]] | |
| 1 [Column[member_id]] | |
| Position of Big Table: 0 | |
| Stage: Stage-1 | |
| Map Reduce | |
| Alias -> Map Operator Tree: | |
| visit_stats:visit_stats | |
| TableScan | |
| alias: visit_stats | |
| Map Join Operator | |
| condition map: | |
| Inner Join 0 to 1 | |
| condition expressions: | |
| 0 {date_int} {member_id} | |
| 1 {gender} | |
| handleSkewJoin: false | |
| keys: | |
| 0 [Column[member_id]] | |
| 1 [Column[member_id]] | |
| outputColumnNames: _col0, _col1, _col6 | |
| Position of Big Table: 0 | |
| File Output Operator | |
| compressed: false | |
| GlobalTableId: 0 | |
| table: | |
| input format: org.apache.hadoop.mapred.SequenceFileInputFormat | |
| output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat | |
| Local Work: | |
| Map Reduce Local Work | |
| Stage: Stage-2 | |
| Map Reduce | |
| Alias -> Map Operator Tree: | |
| hdfs://localhost:54310/tmp/hive-hduser/hive_2012-07-04_17-22-21_726_3200987223179532031/-mr-10002 | |
| Select Operator | |
| expressions: | |
| expr: _col0 | |
| type: int | |
| expr: _col1 | |
| type: int | |
| expr: _col6 | |
| type: int | |
| outputColumnNames: _col0, _col1, _col6 | |
| Select Operator | |
| expressions: | |
| expr: _col0 | |
| type: int | |
| expr: _col1 | |
| type: int | |
| expr: _col6 | |
| type: int | |
| outputColumnNames: _col0, _col1, _col2 | |
| Select Operator | |
| expressions: | |
| expr: _col0 | |
| type: int | |
| expr: _col2 | |
| type: int | |
| expr: _col1 | |
| type: int | |
| outputColumnNames: _col0, _col2, _col1 | |
| Group By Operator | |
| aggregations: | |
| expr: count(DISTINCT _col1) | |
| bucketGroup: false | |
| keys: | |
| expr: _col0 | |
| type: int | |
| expr: _col2 | |
| type: int | |
| expr: _col1 | |
| type: int | |
| mode: hash | |
| outputColumnNames: _col0, _col1, _col2, _col3 | |
| Reduce Output Operator | |
| key expressions: | |
| expr: _col0 | |
| type: int | |
| expr: _col1 | |
| type: int | |
| expr: _col2 | |
| type: int | |
| sort order: +++ | |
| Map-reduce partition columns: | |
| expr: _col0 | |
| type: int | |
| expr: _col1 | |
| type: int | |
| tag: -1 | |
| value expressions: | |
| expr: _col3 | |
| type: bigint | |
| Reduce Operator Tree: | |
| Group By Operator | |
| aggregations: | |
| expr: count(DISTINCT KEY._col2:0._col0) | |
| bucketGroup: false | |
| keys: | |
| expr: KEY._col0 | |
| type: int | |
| expr: KEY._col1 | |
| type: int | |
| mode: mergepartial | |
| outputColumnNames: _col0, _col1, _col2 | |
| Select Operator | |
| expressions: | |
| expr: _col0 | |
| type: int | |
| expr: _col1 | |
| type: int | |
| expr: _col2 | |
| type: bigint | |
| outputColumnNames: _col0, _col1, _col2 | |
| File Output Operator | |
| compressed: false | |
| GlobalTableId: 0 | |
| table: | |
| input format: org.apache.hadoop.mapred.TextInputFormat | |
| output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat | |
| Stage: Stage-0 | |
| Fetch Operator | |
| limit: -1 | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| SELECT | |
| date_int, | |
| gender, | |
| part, | |
| COUNT( DISTINCT member_id ), | |
| SUM( partcount ) | |
| FROM ( | |
| SELECT | |
| date_int, | |
| visit_stats.member_id, | |
| members_map.gender as gender, | |
| parts | |
| FROM visit_stats | |
| JOIN members_map ON(members_map.member_id = visit_stats.member_id) | |
| ) visit_stats | |
| LATERAL VIEW explode(parts) partsTable AS part, partcount | |
| GROUP BY date_int, gender, part | |
| ABSTRACT SYNTAX TREE: | |
| (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_TABLE_OR_COL parts)) part partcount (TOK_TABALIAS partsTable))) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME visit_stats)) (TOK_TABREF (TOK_TABNAME members_map)) (= (. (TOK_TABLE_OR_COL members_map) member_id) (. (TOK_TABLE_OR_COL visit_stats) member_id)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL date_int)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL visit_stats) member_id)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL members_map) gender) gender) (TOK_SELEXPR (TOK_TABLE_OR_COL parts))))) visit_stats))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL date_int)) (TOK_SELEXPR (TOK_TABLE_OR_COL gender)) (TOK_SELEXPR (TOK_TABLE_OR_COL part)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_TABLE_OR_COL member_id))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_TABLE_OR_COL partcount)))) (TOK_GROUPBY (TOK_TABLE_OR_COL date_int) (TOK_TABLE_OR_COL gender) (TOK_TABLE_OR_COL part)))) | |
| STAGE DEPENDENCIES: | |
| Stage-1 is a root stage | |
| Stage-2 depends on stages: Stage-1 | |
| Stage-0 is a root stage | |
| STAGE PLANS: | |
| Stage: Stage-1 | |
| Map Reduce | |
| Alias -> Map Operator Tree: | |
| visit_stats:members_map | |
| TableScan | |
| alias: members_map | |
| Reduce Output Operator | |
| key expressions: | |
| expr: member_id | |
| type: int | |
| sort order: + | |
| Map-reduce partition columns: | |
| expr: member_id | |
| type: int | |
| tag: 1 | |
| value expressions: | |
| expr: gender | |
| type: int | |
| visit_stats:visit_stats | |
| TableScan | |
| alias: visit_stats | |
| Reduce Output Operator | |
| key expressions: | |
| expr: member_id | |
| type: int | |
| sort order: + | |
| Map-reduce partition columns: | |
| expr: member_id | |
| type: int | |
| tag: 0 | |
| value expressions: | |
| expr: date_int | |
| type: int | |
| expr: member_id | |
| type: int | |
| expr: parts | |
| type: map<string,int> | |
| Reduce Operator Tree: | |
| Join Operator | |
| condition map: | |
| Inner Join 0 to 1 | |
| condition expressions: | |
| 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} | |
| 1 {VALUE._col1} | |
| handleSkewJoin: false | |
| outputColumnNames: _col0, _col1, _col2, _col6 | |
| Select Operator | |
| expressions: | |
| expr: _col0 | |
| type: int | |
| expr: _col1 | |
| type: int | |
| expr: _col6 | |
| type: int | |
| expr: _col2 | |
| type: map<string,int> | |
| outputColumnNames: _col0, _col1, _col2, _col3 | |
| Lateral View Forward | |
| Select Operator | |
| SELECT * : (no compute) | |
| Lateral View Join Operator | |
| outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 | |
| Select Operator | |
| expressions: | |
| expr: _col0 | |
| type: int | |
| expr: _col2 | |
| type: int | |
| expr: _col4 | |
| type: string | |
| expr: _col1 | |
| type: int | |
| expr: _col5 | |
| type: int | |
| outputColumnNames: _col0, _col2, _col4, _col1, _col5 | |
| Group By Operator | |
| aggregations: | |
| expr: count(DISTINCT _col1) | |
| expr: sum(_col5) | |
| bucketGroup: false | |
| keys: | |
| expr: _col0 | |
| type: int | |
| expr: _col2 | |
| type: int | |
| expr: _col4 | |
| type: string | |
| expr: _col1 | |
| type: int | |
| mode: hash | |
| outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 | |
| File Output Operator | |
| compressed: false | |
| GlobalTableId: 0 | |
| table: | |
| input format: org.apache.hadoop.mapred.SequenceFileInputFormat | |
| output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat | |
| Select Operator | |
| expressions: | |
| expr: _col3 | |
| type: map<string,int> | |
| outputColumnNames: _col0 | |
| UDTF Operator | |
| function name: explode | |
| Lateral View Join Operator | |
| outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 | |
| Select Operator | |
| expressions: | |
| expr: _col0 | |
| type: int | |
| expr: _col2 | |
| type: int | |
| expr: _col4 | |
| type: string | |
| expr: _col1 | |
| type: int | |
| expr: _col5 | |
| type: int | |
| outputColumnNames: _col0, _col2, _col4, _col1, _col5 | |
| Group By Operator | |
| aggregations: | |
| expr: count(DISTINCT _col1) | |
| expr: sum(_col5) | |
| bucketGroup: false | |
| keys: | |
| expr: _col0 | |
| type: int | |
| expr: _col2 | |
| type: int | |
| expr: _col4 | |
| type: string | |
| expr: _col1 | |
| type: int | |
| mode: hash | |
| outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 | |
| File Output Operator | |
| compressed: false | |
| GlobalTableId: 0 | |
| table: | |
| input format: org.apache.hadoop.mapred.SequenceFileInputFormat | |
| output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat | |
| Stage: Stage-2 | |
| Map Reduce | |
| Alias -> Map Operator Tree: | |
| hdfs://localhost:54310/tmp/hive-hduser/hive_2012-07-04_17-21-43_958_7050465118729026888/-mr-10002 | |
| Reduce Output Operator | |
| key expressions: | |
| expr: _col0 | |
| type: int | |
| expr: _col1 | |
| type: int | |
| expr: _col2 | |
| type: string | |
| expr: _col3 | |
| type: int | |
| sort order: ++++ | |
| Map-reduce partition columns: | |
| expr: _col0 | |
| type: int | |
| expr: _col1 | |
| type: int | |
| expr: _col2 | |
| type: string | |
| tag: -1 | |
| value expressions: | |
| expr: _col4 | |
| type: bigint | |
| expr: _col5 | |
| type: bigint | |
| Reduce Operator Tree: | |
| Group By Operator | |
| aggregations: | |
| expr: count(DISTINCT KEY._col3:0._col0) | |
| expr: sum(VALUE._col1) | |
| bucketGroup: false | |
| keys: | |
| expr: KEY._col0 | |
| type: int | |
| expr: KEY._col1 | |
| type: int | |
| expr: KEY._col2 | |
| type: string | |
| mode: mergepartial | |
| outputColumnNames: _col0, _col1, _col2, _col3, _col4 | |
| Select Operator | |
| expressions: | |
| expr: _col0 | |
| type: int | |
| expr: _col1 | |
| type: int | |
| expr: _col2 | |
| type: string | |
| expr: _col3 | |
| type: bigint | |
| expr: _col4 | |
| type: bigint | |
| outputColumnNames: _col0, _col1, _col2, _col3, _col4 | |
| File Output Operator | |
| compressed: false | |
| GlobalTableId: 0 | |
| table: | |
| input format: org.apache.hadoop.mapred.TextInputFormat | |
| output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat | |
| Stage: Stage-0 | |
| Fetch Operator | |
| limit: -1 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment