Skip to content

Instantly share code, notes, and snippets.

@f-ewald
Created May 24, 2015 19:27
Show Gist options
  • Save f-ewald/8dbd647e2f087d0445b1 to your computer and use it in GitHub Desktop.
Save f-ewald/8dbd647e2f087d0445b1 to your computer and use it in GitHub Desktop.
3.2, 3.3, 3.4
/*
$input_file = file <'/home/cloudera/Downloads/input/sibdataset200.nt'>
$output_file = file
*/
REGISTER RDFStorage.jar ;
indata = LOAD '$input_file' USING RDFStorage() AS (s,p,o) ;
DESCRIBE indata;
likes = FILTER indata BY p == 'sib:like';
user_likes = GROUP likes BY s;
user_likes_count = FOREACH user_likes GENERATE group AS user_id, COUNT(likes) AS likes;
user_avg_like_grp = GROUP user_likes_count ALL;
user_avg_like = FOREACH user_avg_like_grp GENERATE group, AVG(user_likes_count.likes) AS avg;
DUMP user_avg_like;
STORE user_avg_like INTO '$output_file';
REGISTER RDFStorage.jar ;
indata = LOAD '$input_file' USING RDFStorage() AS (s,p,o) ;
DESCRIBE indata;
knows = FILTER indata BY p == 'foaf:knows';
knows_grp = GROUP knows BY o;
popularity = FOREACH knows_grp GENERATE group AS user_id, COUNT(knows) AS u_popularity;
pop_filter = FILTER popularity BY u_popularity >= $k;
STORE pop_filter INTO '$output_file';
REGISTER RDFStorage.jar ;
indata = LOAD '$input_file' USING RDFStorage() AS (s,p,o) ;
creator = FILTER indata BY p == 'sioc:creator_of';
posts = FILTER indata BY o == 'sib:Post';
user_entries = JOIN creator BY o, posts BY s;
user_posts = GROUP user_entries BY creator::s;
post_amount = FOREACH user_posts GENERATE group AS user_id, COUNT(user_entries) AS amount;
user_post_grp = GROUP user_posts BY creator::s;
res = FOREACH user_post_grp GENERATE group AS user, COUNT(user_posts) AS user_posts;
dump res;
STORE res INTO '$output_file';
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment