Created
October 27, 2016 16:43
-
-
Save MattSandy/7b45b29c56b0ec81eb1b3df32a1e0cab to your computer and use it in GitHub Desktop.
Reddit Front Page Monitor
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var http = require('http'); | |
var https = require('https'); | |
var fs = require('fs'); | |
var post_array = []; | |
var user_array = []; | |
fs.writeFile('posts.csv', 'Author,ID,Post Date,Comments,Score,Stickied,Pull,Subreddit\n', function(){console.log('Cleared posts.csv')}); | |
fs.writeFile('users.csv', 'Author,Author Date\n', function(){console.log('Cleared users.csv')}); | |
subreddits = ["all"] | |
for (var i=0;i<subreddits.length; i++) { | |
var subreddit = subreddits[i]; | |
setTimeout(function(){ scrape_hot("",1,subreddit); }, 1000, subreddit); | |
} | |
function scrape_hot(after,page,subreddit) { | |
var url = "https://www.reddit.com/r/" + subreddit + "/.json?after=" + after; | |
https.get(url, function(res){ | |
var body = ''; | |
res.on('data', function(chunk){ | |
body += chunk; | |
}); | |
res.on('end', function(){ | |
try { | |
var response = JSON.parse(body); | |
for(var i=0;i<response.data.children.length;i++) { | |
//if not logged already | |
if(post_array.indexOf(response.data.children[i].data.id) === -1) { | |
//save to array | |
post_array.push(response.data.children[i].data.id); | |
//setup line to write to file | |
var line = response.data.children[i].data.author + ',' + response.data.children[i].data.id + ',' + | |
format_date(response.data.children[i].data.created) + ',' + response.data.children[i].data.num_comments + ',' + | |
response.data.children[i].data.score + ',' + response.data.children[i].data.stickied + ',' + | |
'hot,' + response.data.children[i].data.subreddit + "\n"; | |
console.log(line); | |
//get/write user information | |
scrape_user(response.data.children[i].data.author); | |
//write line | |
fs.appendFile('posts.csv', line, function (err) { | |
//error | |
}); | |
} | |
} | |
if(page<40) { | |
scrape_hot(response.data.after,page+1,subreddit); | |
} else { | |
setTimeout(function(){ scrape_hot("",1,subreddit); }, 1000*60*20, subreddit); | |
} | |
} catch (err) { | |
console.log(err); | |
scrape_hot(after,page,subreddit); | |
} | |
}); | |
}).on('error', function(e){ | |
scrape_hot(after,page,subreddit); | |
}); | |
} | |
function scrape_user(user) { | |
if(user_array.indexOf(user) === -1) { | |
user_array.push(user); | |
var url = "https://www.reddit.com/user/" + user + "/about.json"; | |
https.get(url, function(res){ | |
var body = ''; | |
res.on('data', function(chunk){ | |
body += chunk; | |
}); | |
res.on('end', function(){ | |
try { | |
var response = JSON.parse(body); | |
var line = user + "," + format_date(response.data.created) + "\n"; | |
fs.appendFile('users.csv', line, function (err) { | |
//success | |
}); | |
} catch (err) { | |
scrape_user(user); | |
} | |
}); | |
}).on('error', function(e){ | |
console.log("Got an error: ", e); | |
scrape_user(user); | |
}); | |
} | |
} | |
function format_date(date) { | |
var date = new Date(date*1000); | |
var yyyy = date.getFullYear().toString(); | |
var mm = (date.getMonth()+1).toString(); // getMonth() is zero-based | |
var dd = date.getDate().toString(); | |
return yyyy + "-" + (mm[1]?mm:"0"+mm[0]) + "-" + (dd[1]?dd:"0"+dd[0]); // padding | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment