Created
July 11, 2011 14:41
-
-
Save neilkod/1075989 to your computer and use it in GitHub Desktop.
attempt at pig style
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
raw = LOAD 'hbase://user_info_helix' | |
USING org.apache.pig.backend.hadoop.hbase.HBaseStorage( | |
'alias:helix profile:dw.last_sess_dt alias:*', '-loadKey') | |
AS (reg_method:chararray, helix_id:chararray, last_sess_dt:chararray, alias_map:map[]); | |
flattened = FOREACH raw | |
GENERATE reg_method as reg_method | |
, helix_id as helix_id | |
, last_sess_dt as last_sess_dt | |
, FLATTEN(mapToBag(alias_map)) as (dynamic_reg:chararray,session_time:chararray); | |
lmtd = FILTER flattened | |
BY NOT (last_sess_dt is null) | |
AND (dynamic_reg MATCHES 'UREG-.*' or dynamic_reg MATCHES 'ANON-.*'); | |
raw_data = FOREACH lmtd | |
GENERATE reg_method | |
, helix_id | |
, dynamic_reg; | |
SPLIT raw_data | |
INTO ureg if reg_method matches 'UREG-.*' and not (helix_id is null) | |
, anon if reg_method matches 'ANON-.*' and helix_id is null | |
, anon_with_helix_id if reg_method matches 'ANON-.*' and not (helix_id is null); | |
forward = FOREACH joind | |
GENERATE ureg_with_helix::anon | |
, ureg_with_helix::helix_id | |
, tupleToMap(CONCAT('HELIX-', (chararray)ureg_with_helix::helix_id), $run_date); | |
STORE forward INTO 'hbase://user_info_helix' | |
USING org.apache.pig.backend.hadoop.hbase.HBaseStorage('alias:helix alias:'); | |
anon_users = FILTER raw | |
BY reg MATCHES 'ANON-.*' | |
AND helix_id IS NULL | |
AND NOT (last_sess_dt is null); | |
new_anon_with_helix_id = FOREACH anon_users | |
GENERATE reg as anon | |
, (chararray)HelixHash(reg) AS helix_id; | |
to_store = FOREACH new_anon_with_helix_id | |
GENERATE anon | |
, helix_id | |
, tupleToMap(CONCAT('HELIX-', (chararray) helix_id), $run_date); | |
store to_store INTO 'hbase://user_info_helix' | |
USING org.apache.pig.backend.hadoop.hbase.HBaseStorage('alias:helix alias:'); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment