Skip to content

Instantly share code, notes, and snippets.

@Canx
Last active August 29, 2015 14:09
Show Gist options
  • Save Canx/f2bd40bd9effd8498690 to your computer and use it in GitHub Desktop.
Save Canx/f2bd40bd9effd8498690 to your computer and use it in GitHub Desktop.
scrapping de candidaturas
function getRedditUserData(usuario) {
var url="http://www.reddit.com/user/" + usuario + "/about.json";
Logger.log("URL:" +url);
var response = UrlFetchApp.fetch(url);
Logger.log("RESPONSE:" + response);
return response.getContentText();
}
function getCommentKarma(data) {
var json = JSON.parse(data);
var karma = json.data.comment_karma;
Logger.log("COMMENT KARMA:" + karma);
return karma;
}
function getLinkKarma(data) {
var json = JSON.parse(data);
var karma = json.data.link_karma;
Logger.log("LINK KARMA:" + karma);
return karma;
}
function getCreatedDate(data) {
var json = JSON.parse(data);
var created = json.data.created;
Logger.log("CREATED:" + created);
return created;
}
// ------------------------------------------
// SCRAPPING
// ------------------------------------------
function getCommentsPage(page, process) {
var url;
if (page == null) {
return;
}
if (page == "start") {
url = "http://es.reddit.com/r/podemos/comments/.json";
}
else {
url = "http://es.reddit.com/r/podemos/comments/.json?after=" + page
}
var response = UrlFetchApp.fetch(url);
var json = JSON.parse(response);
traverse(json, process);
}
function saveUsers(key, value) {
if (key == "author") {
tempuser = value;
}
if (key == "author_flair_text") {
if (value != null) {
addUserToSheet(tempuser, value);
}
}
if (key == "after") {
page = value;
if (page != null) {
setLastPage(value);
}
}
}
var page;
// AQUI EMPIEZA TODO
function start() {
page = getLastPage();
while (page != null) {
Logger.log(page);
getCommentsPage(page, saveUsers );
Utilities.sleep(1000);
}
}
function getLastPage() {
var page = SpreadsheetApp.getActiveSheet().getRange(1, 4, 1, 1).getValue();
Logger.log(page);
return page;
}
function setLastPage(page) {
SpreadsheetApp.getActiveSheet().getRange(1, 4, 1, 1).setValue(page);
}
function addUserToSheet(user, candidature) {
var ss = SpreadsheetApp.getActiveSpreadsheet();
var sheet = ss.getSheets()[2];
var row = sheet.getLastRow();
var col = sheet.getLastColumn();
var data = [[user, candidature]];
var range = sheet.getRange(row+1, 1, data.length, data[0].length);
try {
range.setValues(data);
} catch (e) {
Logger.log("ERROR:" + e.toString());
}
}
function traverse(o,func) {
for (var i in o) {
func.apply(this,[i,o[i]]);
if (o[i] !== null && typeof(o[i])=="object") {
//going on step down in the object tree!!
traverse(o[i],func);
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment