Skip to content

Instantly share code, notes, and snippets.

@neilkod
Created July 11, 2011 14:41
Show Gist options
  • Save neilkod/1075989 to your computer and use it in GitHub Desktop.
Save neilkod/1075989 to your computer and use it in GitHub Desktop.
attempt at pig style
raw = LOAD 'hbase://user_info_helix'
USING org.apache.pig.backend.hadoop.hbase.HBaseStorage(
'alias:helix profile:dw.last_sess_dt alias:*', '-loadKey')
AS (reg_method:chararray, helix_id:chararray, last_sess_dt:chararray, alias_map:map[]);
flattened = FOREACH raw
GENERATE reg_method as reg_method
, helix_id as helix_id
, last_sess_dt as last_sess_dt
, FLATTEN(mapToBag(alias_map)) as (dynamic_reg:chararray,session_time:chararray);
lmtd = FILTER flattened
BY NOT (last_sess_dt is null)
AND (dynamic_reg MATCHES 'UREG-.*' or dynamic_reg MATCHES 'ANON-.*');
raw_data = FOREACH lmtd
GENERATE reg_method
, helix_id
, dynamic_reg;
SPLIT raw_data
INTO ureg if reg_method matches 'UREG-.*' and not (helix_id is null)
, anon if reg_method matches 'ANON-.*' and helix_id is null
, anon_with_helix_id if reg_method matches 'ANON-.*' and not (helix_id is null);
forward = FOREACH joind
GENERATE ureg_with_helix::anon
, ureg_with_helix::helix_id
, tupleToMap(CONCAT('HELIX-', (chararray)ureg_with_helix::helix_id), $run_date);
STORE forward INTO 'hbase://user_info_helix'
USING org.apache.pig.backend.hadoop.hbase.HBaseStorage('alias:helix alias:');
anon_users = FILTER raw
BY reg MATCHES 'ANON-.*'
AND helix_id IS NULL
AND NOT (last_sess_dt is null);
new_anon_with_helix_id = FOREACH anon_users
GENERATE reg as anon
, (chararray)HelixHash(reg) AS helix_id;
to_store = FOREACH new_anon_with_helix_id
GENERATE anon
, helix_id
, tupleToMap(CONCAT('HELIX-', (chararray) helix_id), $run_date);
store to_store INTO 'hbase://user_info_helix'
USING org.apache.pig.backend.hadoop.hbase.HBaseStorage('alias:helix alias:');
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment