Created
August 29, 2012 11:58
-
-
Save wyukawa/3511506 to your computer and use it in GitHub Desktop.
Pig動作確認ログ
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| $ wget http://ftp.jaist.ac.jp/pub/apache/pig/pig-0.10.0/pig-0.10.0.tar.gz | |
| $ tar zxvf pig-0.10.0.tar.gz | |
| $ cd pig-0.10.0 | |
| $ export JAVA_HOME=/System/Library/Frameworks/JavaVM.framework/Versions/CurrentJDK/Home/ | |
| $ wget --no-check-certificate https://raw.github.com/tomwhite/hadoop-book/master/input/ncdc/micro-tab/sample.txt | |
| $ cat sample.txt | |
| 1950 0 1 | |
| 1950 22 1 | |
| 1950 -11 1 | |
| 1949 111 1 | |
| 1949 78 1 | |
| $ bin/pig -x local | |
| grunt> records = LOAD 'sample.txt' AS (year:chararray, temperature:int, quality:int); | |
| grunt> DUMP records; | |
| (1950,0,1) | |
| (1950,22,1) | |
| (1950,-11,1) | |
| (1949,111,1) | |
| (1949,78,1) | |
| grunt> filtered_records = FILTER records BY temperature != 9999 AND (quality == 0 OR quality == 1 OR quality == 4 OR quality == 5 OR quality == 9); | |
| grunt> DUMP filtered_records; | |
| (1950,0,1) | |
| (1950,22,1) | |
| (1950,-11,1) | |
| (1949,111,1) | |
| (1949,78,1) | |
| grunt> grouped_records = GROUP filtered_records BY year; | |
| grunt> DUMP grouped_records; | |
| (1949,{(1949,111,1),(1949,78,1)}) | |
| (1950,{(1950,0,1),(1950,22,1),(1950,-11,1)}) | |
| grunt> max_temp = FOREACH grouped_records GENERATE group, MAX(filtered_records.temperature); | |
| grunt> DUMP max_temp; | |
| (1949,111) | |
| (1950,22) | |
| grunt> ILLUSTRATE max_temp; | |
| ---------------------------------------------------------------------------- | |
| | records | year:chararray | temperature:int | quality:int | | |
| ---------------------------------------------------------------------------- | |
| | | 1949 | 78 | 1 | | |
| | | 1949 | 111 | 1 | | |
| | | 1949 | 9999 | 1 | | |
| ---------------------------------------------------------------------------- | |
| ------------------------------------------------------------------------------------- | |
| | filtered_records | year:chararray | temperature:int | quality:int | | |
| ------------------------------------------------------------------------------------- | |
| | | 1949 | 78 | 1 | | |
| | | 1949 | 111 | 1 | | |
| ------------------------------------------------------------------------------------- | |
| -------------------------------------------------------------------------------------------------------------------------------------------- | |
| | grouped_records | group:chararray | filtered_records:bag{:tuple(year:chararray,temperature:int,quality:int)} | | |
| -------------------------------------------------------------------------------------------------------------------------------------------- | |
| | | 1949 | {(1949, 78, 1), (1949, 111, 1)} | | |
| -------------------------------------------------------------------------------------------------------------------------------------------- | |
| ------------------------------------------------- | |
| | max_temp | group:chararray | :int | | |
| ------------------------------------------------- | |
| | | 1949 | 111 | | |
| ------------------------------------------------- | |
| grunt> EXPLAIN max_temp; | |
| #----------------------------------------------- | |
| # New Logical Plan: | |
| #----------------------------------------------- | |
| max_temp: (Name: LOStore Schema: group#352:chararray,#402:int) | |
| | | |
| |---max_temp: (Name: LOForEach Schema: group#352:chararray,#402:int) | |
| | | | |
| | (Name: LOGenerate[false,false] Schema: group#352:chararray,#402:int)ColumnPrune:InputUids=[352, 398]ColumnPrune:OutputUids=[352, 402] | |
| | | | | |
| | | group:(Name: Project Type: chararray Uid: 352 Input: 0 Column: (*)) | |
| | | | | |
| | | (Name: UserFunc(org.apache.pig.builtin.IntMax) Type: int Uid: 402) | |
| | | | | |
| | | |---(Name: Dereference Type: bag Uid: 401 Column:[1]) | |
| | | | | |
| | | |---filtered_records:(Name: Project Type: bag Uid: 398 Input: 1 Column: (*)) | |
| | | | |
| | |---(Name: LOInnerLoad[0] Schema: group#352:chararray) | |
| | | | |
| | |---filtered_records: (Name: LOInnerLoad[1] Schema: year#352:chararray,temperature#353:int,quality#354:int) | |
| | | |
| |---grouped_records: (Name: LOCogroup Schema: group#352:chararray,filtered_records#398:bag{#411:tuple(year#352:chararray,temperature#353:int,quality#354:int)}) | |
| | | | |
| | year:(Name: Project Type: chararray Uid: 352 Input: 0 Column: 0) | |
| | | |
| |---filtered_records: (Name: LOFilter Schema: year#352:chararray,temperature#353:int,quality#354:int) | |
| | | | |
| | (Name: And Type: boolean Uid: 410) | |
| | | | |
| | |---(Name: NotEqual Type: boolean Uid: 382) | |
| | | | | |
| | | |---temperature:(Name: Project Type: int Uid: 353 Input: 0 Column: 1) | |
| | | | | |
| | | |---(Name: Constant Type: int Uid: 381) | |
| | | | |
| | |---(Name: Or Type: boolean Uid: 396) | |
| | | | |
| | |---(Name: Or Type: boolean Uid: 393) | |
| | | | | |
| | | |---(Name: Or Type: boolean Uid: 390) | |
| | | | | | |
| | | | |---(Name: Or Type: boolean Uid: 387) | |
| | | | | | | |
| | | | | |---(Name: Equal Type: boolean Uid: 384) | |
| | | | | | | | |
| | | | | | |---quality:(Name: Project Type: int Uid: 354 Input: 0 Column: 2) | |
| | | | | | | | |
| | | | | | |---(Name: Constant Type: int Uid: 383) | |
| | | | | | | |
| | | | | |---(Name: Equal Type: boolean Uid: 386) | |
| | | | | | | |
| | | | | |---quality:(Name: Project Type: int Uid: 354 Input: 0 Column: 2) | |
| | | | | | | |
| | | | | |---(Name: Constant Type: int Uid: 385) | |
| | | | | | |
| | | | |---(Name: Equal Type: boolean Uid: 389) | |
| | | | | | |
| | | | |---quality:(Name: Project Type: int Uid: 354 Input: 0 Column: 2) | |
| | | | | | |
| | | | |---(Name: Constant Type: int Uid: 388) | |
| | | | | |
| | | |---(Name: Equal Type: boolean Uid: 392) | |
| | | | | |
| | | |---quality:(Name: Project Type: int Uid: 354 Input: 0 Column: 2) | |
| | | | | |
| | | |---(Name: Constant Type: int Uid: 391) | |
| | | | |
| | |---(Name: Equal Type: boolean Uid: 395) | |
| | | | |
| | |---quality:(Name: Project Type: int Uid: 354 Input: 0 Column: 2) | |
| | | | |
| | |---(Name: Constant Type: int Uid: 394) | |
| | | |
| |---records: (Name: LOForEach Schema: year#352:chararray,temperature#353:int,quality#354:int) | |
| | | | |
| | (Name: LOGenerate[false,false,false] Schema: year#352:chararray,temperature#353:int,quality#354:int)ColumnPrune:InputUids=[352, 353, 354]ColumnPrune:OutputUids=[352, 353, 354] | |
| | | | | |
| | | (Name: Cast Type: chararray Uid: 352) | |
| | | | | |
| | | |---year:(Name: Project Type: bytearray Uid: 352 Input: 0 Column: (*)) | |
| | | | | |
| | | (Name: Cast Type: int Uid: 353) | |
| | | | | |
| | | |---temperature:(Name: Project Type: bytearray Uid: 353 Input: 1 Column: (*)) | |
| | | | | |
| | | (Name: Cast Type: int Uid: 354) | |
| | | | | |
| | | |---quality:(Name: Project Type: bytearray Uid: 354 Input: 2 Column: (*)) | |
| | | | |
| | |---(Name: LOInnerLoad[0] Schema: year#352:bytearray) | |
| | | | |
| | |---(Name: LOInnerLoad[1] Schema: temperature#353:bytearray) | |
| | | | |
| | |---(Name: LOInnerLoad[2] Schema: quality#354:bytearray) | |
| | | |
| |---records: (Name: LOLoad Schema: year#352:bytearray,temperature#353:bytearray,quality#354:bytearray)RequiredFields:null | |
| #----------------------------------------------- | |
| # Physical Plan: | |
| #----------------------------------------------- | |
| max_temp: Store(fakefile:org.apache.pig.builtin.PigStorage) - scope-256 | |
| | | |
| |---max_temp: New For Each(false,false)[bag] - scope-255 | |
| | | | |
| | Project[chararray][0] - scope-249 | |
| | | | |
| | POUserFunc(org.apache.pig.builtin.IntMax)[int] - scope-253 | |
| | | | |
| | |---Project[bag][1] - scope-252 | |
| | | | |
| | |---Project[bag][1] - scope-251 | |
| | | |
| |---grouped_records: Package[tuple]{chararray} - scope-246 | |
| | | |
| |---grouped_records: Global Rearrange[tuple] - scope-245 | |
| | | |
| |---grouped_records: Local Rearrange[tuple]{chararray}(false) - scope-247 | |
| | | | |
| | Project[chararray][0] - scope-248 | |
| | | |
| |---filtered_records: Filter[bag] - scope-221 | |
| | | | |
| | And[boolean] - scope-244 | |
| | | | |
| | |---Not Equal To[boolean] - scope-224 | |
| | | | | |
| | | |---Project[int][1] - scope-222 | |
| | | | | |
| | | |---Constant(9999) - scope-223 | |
| | | | |
| | |---Or[boolean] - scope-243 | |
| | | | |
| | |---Or[boolean] - scope-239 | |
| | | | | |
| | | |---Or[boolean] - scope-235 | |
| | | | | | |
| | | | |---Or[boolean] - scope-231 | |
| | | | | | | |
| | | | | |---Equal To[boolean] - scope-227 | |
| | | | | | | | |
| | | | | | |---Project[int][2] - scope-225 | |
| | | | | | | | |
| | | | | | |---Constant(0) - scope-226 | |
| | | | | | | |
| | | | | |---Equal To[boolean] - scope-230 | |
| | | | | | | |
| | | | | |---Project[int][2] - scope-228 | |
| | | | | | | |
| | | | | |---Constant(1) - scope-229 | |
| | | | | | |
| | | | |---Equal To[boolean] - scope-234 | |
| | | | | | |
| | | | |---Project[int][2] - scope-232 | |
| | | | | | |
| | | | |---Constant(4) - scope-233 | |
| | | | | |
| | | |---Equal To[boolean] - scope-238 | |
| | | | | |
| | | |---Project[int][2] - scope-236 | |
| | | | | |
| | | |---Constant(5) - scope-237 | |
| | | | |
| | |---Equal To[boolean] - scope-242 | |
| | | | |
| | |---Project[int][2] - scope-240 | |
| | | | |
| | |---Constant(9) - scope-241 | |
| | | |
| |---records: New For Each(false,false,false)[bag] - scope-220 | |
| | | | |
| | Cast[chararray] - scope-212 | |
| | | | |
| | |---Project[bytearray][0] - scope-211 | |
| | | | |
| | Cast[int] - scope-215 | |
| | | | |
| | |---Project[bytearray][1] - scope-214 | |
| | | | |
| | Cast[int] - scope-218 | |
| | | | |
| | |---Project[bytearray][2] - scope-217 | |
| | | |
| |---records: Load(file:///Users/wyukawa/pig-work/pig-0.10.0/sample.txt:org.apache.pig.builtin.PigStorage) - scope-210 | |
| 2012-08-29 20:55:22,432 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MRCompiler - File concatenation threshold: 100 optimistic? false | |
| 2012-08-29 20:55:22,432 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.CombinerOptimizer - Choosing to move algebraic foreach to combiner | |
| 2012-08-29 20:55:22,435 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MultiQueryOptimizer - MR plan size before optimization: 1 | |
| 2012-08-29 20:55:22,435 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MultiQueryOptimizer - MR plan size after optimization: 1 | |
| #-------------------------------------------------- | |
| # Map Reduce Plan | |
| #-------------------------------------------------- | |
| MapReduce node scope-257 | |
| Map Plan | |
| grouped_records: Local Rearrange[tuple]{chararray}(false) - scope-270 | |
| | | | |
| | Project[chararray][0] - scope-271 | |
| | | |
| |---max_temp: New For Each(false,false)[bag] - scope-258 | |
| | | | |
| | Project[chararray][0] - scope-259 | |
| | | | |
| | POUserFunc(org.apache.pig.builtin.IntMax$Initial)[tuple] - scope-260 | |
| | | | |
| | |---Project[bag][1] - scope-261 | |
| | | | |
| | |---Project[bag][1] - scope-262 | |
| | | |
| |---Pre Combiner Local Rearrange[tuple]{Unknown} - scope-272 | |
| | | |
| |---filtered_records: Filter[bag] - scope-221 | |
| | | | |
| | And[boolean] - scope-244 | |
| | | | |
| | |---Not Equal To[boolean] - scope-224 | |
| | | | | |
| | | |---Project[int][1] - scope-222 | |
| | | | | |
| | | |---Constant(9999) - scope-223 | |
| | | | |
| | |---Or[boolean] - scope-243 | |
| | | | |
| | |---Or[boolean] - scope-239 | |
| | | | | |
| | | |---Or[boolean] - scope-235 | |
| | | | | | |
| | | | |---Or[boolean] - scope-231 | |
| | | | | | | |
| | | | | |---Equal To[boolean] - scope-227 | |
| | | | | | | | |
| | | | | | |---Project[int][2] - scope-225 | |
| | | | | | | | |
| | | | | | |---Constant(0) - scope-226 | |
| | | | | | | |
| | | | | |---Equal To[boolean] - scope-230 | |
| | | | | | | |
| | | | | |---Project[int][2] - scope-228 | |
| | | | | | | |
| | | | | |---Constant(1) - scope-229 | |
| | | | | | |
| | | | |---Equal To[boolean] - scope-234 | |
| | | | | | |
| | | | |---Project[int][2] - scope-232 | |
| | | | | | |
| | | | |---Constant(4) - scope-233 | |
| | | | | |
| | | |---Equal To[boolean] - scope-238 | |
| | | | | |
| | | |---Project[int][2] - scope-236 | |
| | | | | |
| | | |---Constant(5) - scope-237 | |
| | | | |
| | |---Equal To[boolean] - scope-242 | |
| | | | |
| | |---Project[int][2] - scope-240 | |
| | | | |
| | |---Constant(9) - scope-241 | |
| | | |
| |---records: New For Each(false,false,false)[bag] - scope-220 | |
| | | | |
| | Cast[chararray] - scope-212 | |
| | | | |
| | |---Project[bytearray][0] - scope-211 | |
| | | | |
| | Cast[int] - scope-215 | |
| | | | |
| | |---Project[bytearray][1] - scope-214 | |
| | | | |
| | Cast[int] - scope-218 | |
| | | | |
| | |---Project[bytearray][2] - scope-217 | |
| | | |
| |---records: Load(file:///Users/wyukawa/pig-work/pig-0.10.0/sample.txt:org.apache.pig.builtin.PigStorage) - scope-210-------- | |
| Combine Plan | |
| grouped_records: Local Rearrange[tuple]{chararray}(false) - scope-274 | |
| | | | |
| | Project[chararray][0] - scope-275 | |
| | | |
| |---max_temp: New For Each(false,false)[bag] - scope-263 | |
| | | | |
| | Project[chararray][0] - scope-264 | |
| | | | |
| | POUserFunc(org.apache.pig.builtin.IntMax$Intermediate)[tuple] - scope-265 | |
| | | | |
| | |---Project[bag][1] - scope-266 | |
| | | |
| |---POCombinerPackage[tuple]{chararray} - scope-268-------- | |
| Reduce Plan | |
| max_temp: Store(fakefile:org.apache.pig.builtin.PigStorage) - scope-256 | |
| | | |
| |---max_temp: New For Each(false,false)[bag] - scope-255 | |
| | | | |
| | Project[chararray][0] - scope-249 | |
| | | | |
| | POUserFunc(org.apache.pig.builtin.IntMax$Final)[int] - scope-253 | |
| | | | |
| | |---Project[bag][1] - scope-267 | |
| | | |
| |---POCombinerPackage[tuple]{chararray} - scope-276-------- | |
| Global sort: false | |
| ---------------- | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment