Last active
January 1, 2016 17:29
-
-
Save rachtsingh/8177874 to your computer and use it in GitHub Desktop.
script to parse Facebook page for Harvard Class of 2018 and extract the '20 facts' information and dump it to a JSON file. To use, set up a JavaScript Facebook app and launch these scripts after authentication
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function executeScript(){ | |
re = new RegExp("[4][\:|\.|\)|\-][^1-9]"); // yeaaaaaah REGEXs (also the 4 is there because of that 2 truths and a lie thing) | |
function findmatches(response){ | |
console.log("parsing page"); // just for kicks | |
flag = true; | |
if (response.data.length < 2){ // empty response, sort of | |
flag = false; | |
} | |
if(flag){ | |
response.data.forEach(function(post){ | |
if (post.message && (post.message.match(re) != null)){ | |
posts.push(post); | |
} | |
}); | |
FB.api(response.paging.next, findmatches); | |
} | |
else { | |
console.log(posts); | |
getPictures(); // get profile pictures for people | |
} | |
} | |
FB.api('/601401026562908/feed/', findmatches); | |
} | |
function getPictures(){ | |
counts = posts.length; | |
posts.forEach(function(post){ | |
FB.api("/" + post.from.id + "/?fields=picture.type(square)", function(response){ | |
post.picture = response.picture.data.url; | |
delete post.likes; // gotta keep the data size at least a little reasonable | |
delete post.comments; | |
delete post.actions; | |
delete post.application; | |
delete post.id; | |
delete post.privacy; | |
delete post.type; | |
delete post.updated_time; | |
counts -= 1; | |
if (counts == 0){ | |
console.log("Done getting pictures!"); | |
saveOutput(); | |
} | |
}); | |
}); | |
} | |
function saveOutput(){ | |
var blob = new Blob([JSON.stringify(posts)], {type: "text/plain;charset=utf-8"}); | |
saveAs(blob, "harvard.json"); | |
} | |
// Load the SDK asynchronously | |
(function(d){ | |
var js, id = 'facebook-jssdk', ref = d.getElementsByTagName('script')[0]; | |
if (d.getElementById(id)) {return;} | |
js = d.createElement('script'); js.id = id; js.async = true; | |
js.src = "//connect.facebook.net/en_US/all.js"; | |
ref.parentNode.insertBefore(js, ref); | |
}(document)); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment