Created
May 24, 2015 19:27
-
-
Save f-ewald/8dbd647e2f087d0445b1 to your computer and use it in GitHub Desktop.
3.2, 3.3, 3.4
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
$input_file = file <'/home/cloudera/Downloads/input/sibdataset200.nt'> | |
$output_file = file | |
*/ | |
REGISTER RDFStorage.jar ; | |
indata = LOAD '$input_file' USING RDFStorage() AS (s,p,o) ; | |
DESCRIBE indata; | |
likes = FILTER indata BY p == 'sib:like'; | |
user_likes = GROUP likes BY s; | |
user_likes_count = FOREACH user_likes GENERATE group AS user_id, COUNT(likes) AS likes; | |
user_avg_like_grp = GROUP user_likes_count ALL; | |
user_avg_like = FOREACH user_avg_like_grp GENERATE group, AVG(user_likes_count.likes) AS avg; | |
DUMP user_avg_like; | |
STORE user_avg_like INTO '$output_file'; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
REGISTER RDFStorage.jar ; | |
indata = LOAD '$input_file' USING RDFStorage() AS (s,p,o) ; | |
DESCRIBE indata; | |
knows = FILTER indata BY p == 'foaf:knows'; | |
knows_grp = GROUP knows BY o; | |
popularity = FOREACH knows_grp GENERATE group AS user_id, COUNT(knows) AS u_popularity; | |
pop_filter = FILTER popularity BY u_popularity >= $k; | |
STORE pop_filter INTO '$output_file'; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
REGISTER RDFStorage.jar ; | |
indata = LOAD '$input_file' USING RDFStorage() AS (s,p,o) ; | |
creator = FILTER indata BY p == 'sioc:creator_of'; | |
posts = FILTER indata BY o == 'sib:Post'; | |
user_entries = JOIN creator BY o, posts BY s; | |
user_posts = GROUP user_entries BY creator::s; | |
post_amount = FOREACH user_posts GENERATE group AS user_id, COUNT(user_entries) AS amount; | |
user_post_grp = GROUP user_posts BY creator::s; | |
res = FOREACH user_post_grp GENERATE group AS user, COUNT(user_posts) AS user_posts; | |
dump res; | |
STORE res INTO '$output_file'; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment