Created
October 30, 2014 14:10
-
-
Save michael-erasmus/935c652b281930c07805 to your computer and use it in GitHub Desktop.
transform_actions_taken.pig
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
REGISTER '../udfs/jython/actions_taken.py' USING jython AS actions_taken; | |
REGISTER '../udfs/python/actions_taken.py' USING streaming_python AS actions_taken1; | |
raw = load '$OUTPUT_PATH/extract-actions-taken' | |
using PigStorage() | |
as ( | |
user_id:chararray, | |
visitor_id:chararray, | |
client_id:chararray, | |
last_modified:chararray, | |
user_joined_at:chararray, | |
date:chararray, | |
value:bag{t:tuple()}, | |
extra_data:chararray | |
); | |
with_scopes = foreach raw generate | |
user_id, | |
visitor_id, | |
client_id, | |
actions_taken1.transform_date(last_modified) as last_modified, | |
actions_taken1.transform_date(date), | |
actions_taken1.transform_joined_at(user_joined_at), | |
actions_taken.pull_out_scopes(value) as scopes, | |
extra_data; | |
transformed = foreach with_scopes generate | |
user_id, | |
visitor_id, | |
client_id, | |
last_modified, | |
user_joined_at, | |
date, | |
flatten(scopes.$0) as scope1, | |
flatten(scopes.$1) as scope2, | |
flatten(scopes.$2) as scope3, | |
flatten(scopes.$3) as scope4, | |
flatten(scopes.$4) as scope5, | |
flatten(scopes.$5) as scope6, | |
flatten(scopes.$6) as scope7, | |
flatten(scopes.$7) as scope8, | |
flatten(scopes.$8) as scope9, | |
flatten(scopes.$9) as scope10, | |
extra_data; | |
-- Use gzip compression | |
set output.compression.enabled true; | |
set output.compression.codec org.apache.hadoop.io.compress.GzipCodec; | |
rmf $OUTPUT_PATH/transform-actions-taken; | |
store transformed into '$OUTPUT_PATH/transform-actions-taken' using PigStorage(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment