Created
September 12, 2016 20:39
-
-
Save nullberri/9f5094518970e5c2d279f6a63ca67064 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var async = require('asyncawait/async'); | |
var await = require('asyncawait/await'); | |
var fs = require('fs-promise'); | |
var CronJob = require('cron').CronJob; | |
var request = require('request'); | |
var urlParser = require("./urlParse.js").CreateUrlParser(); | |
var Redditor = require("mongoose").model("redditor"); | |
var CONFIG = require('../../config/config.js').reddit | |
const batch = require('mongoose').model('batch'); | |
const name = "reddit" | |
var downloadFile = function (url, dest) { | |
return new Promise(function (resolve, reject) { | |
request.get(url, function (err, res, body) { | |
if (err) { return reject(err); } | |
resolve(); | |
}).pipe(fs.createWriteStream(dest)).on('error', function (e) { console.log(e.stack); }); | |
}); | |
}; | |
var downloadRedditor = async(function (redditor) { | |
var path = CONFIG.downloadPath + redditor.username + "/"; | |
//check if downloads exists | |
if (!await(fs.exists(CONFIG.downloadPath))) { | |
await(fs.mkdir(CONFIG.downloadPath)); | |
} | |
//create folder for the username | |
if (!await(fs.exists(path))) { | |
await(fs.mkdir(path)); | |
} | |
var links = await(redditor.links()); | |
links.forEach(function (link) { | |
try { | |
var parsedUrls = await(urlParser.parse(link)); | |
//todo fix this | |
path = CONFIG.downloadPath + redditor.username + "/"; | |
//create folder for album | |
if (parsedUrls.album) { | |
path += parsedUrls.album + "/"; | |
if (!await(fs.exists(path))) { | |
await(fs.mkdir(path)); | |
} | |
} | |
parsedUrls.images.forEach(function (url) { | |
//extract filename | |
if(!url){return;} | |
console.log(url); | |
var filename = url.split("/").pop(); | |
await(downloadFile(url, path + filename)); | |
//is file a missing file from imgur | |
var stats = await(fs.stat(path + filename)); | |
if (stats['size'] < 1000) { | |
//console.log("file was a missing image: " + path + filename); | |
await(fs.unlink(path + filename)); | |
} | |
//console.log(url); | |
}); | |
} | |
catch (err) { | |
console.log(err.stack) | |
} | |
}); | |
}); | |
/////////////////// | |
//BEGIN API EXPORTS | |
/////////////////// | |
exports.processRedditors = async(function () { | |
try { | |
var redditors = await(Redditor.find({})); | |
redditors.forEach(function (redditor) { | |
console.log(redditor.username); | |
await(downloadRedditor(redditor)); | |
await(redditor.save()); | |
}); | |
await(batch.setTimestamp(name, Date.now())); | |
} catch (err) { | |
console.log(err.stack); | |
} | |
}); | |
//////////////// | |
//initialization | |
//////////////// | |
if (global.PROD == true) { | |
new CronJob('15 * * * *', function () { | |
exports.processRedditors(); | |
}, null, true); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
'use strict' | |
var async = require('asyncawait/async'); | |
var await = require('asyncawait/await'); | |
var mongoose = require('mongoose'); | |
var request = require('request-promise'); | |
var redditor = mongoose.Schema({ | |
username: String, | |
error: String, | |
lastUpdate: Number, | |
lastPost: Number, | |
}); | |
redditor.methods.reset = async(function () { | |
this.lastUpdate = 0; | |
this.lastPost = 0; | |
this.error = ""; | |
}); | |
redditor.methods.links = async(function () { | |
var links = []; | |
this.lastUpdate = new Date().getTime(); | |
this.error = ""; | |
try { | |
var body = await(request(this.url+ ".json?limit=100", { gzip: true })); | |
var json = JSON.parse(body); | |
if (json.data && json.data.children) { | |
for (var i = json.data.children.length-1; i >= 0 ; i--) { | |
let entry = json.data.children[i].data; | |
let timestamp = entry.created_utc * 1000; | |
if (timestamp > this.lastPost) { | |
links.push(entry.url); | |
this.lastPost = timestamp; | |
} | |
} | |
} | |
//todo make this accept multiple errors; | |
if (Math.abs(this.lastUpdate - this.lastPost) > (86400000 * 120)) { | |
this.error += "More than 4 months since last post. "; | |
} | |
} catch (err) { | |
if (err.statusCode == 404) { | |
this.error += "User not found. "; | |
} | |
} | |
return links; | |
}); | |
redditor.virtual('url').get(function () { | |
return "http://www.reddit.com/user/" + this.username + "/submitted/"; | |
}); | |
module.exports = mongoose.model('redditor', redditor); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var http = require('http'); | |
var request = require('request'); | |
var async = require('asyncawait/async'); | |
var await = require('asyncawait/await'); | |
var Url = require("url"); | |
var CONFIG = require('../../config/config.js').reddit | |
var JPG = ".jpg"; | |
//move this back to an environment file | |
function getJson (options) { | |
return new Promise(function (resolve, reject) { | |
request.get(options, function (err, res, body) { | |
if (err) { return reject(err); } | |
try { | |
var json = JSON.parse(body); | |
return resolve(json); | |
} | |
catch (e) { | |
console.log(e) | |
console.log(body) | |
reject(e) | |
} | |
}); | |
}); | |
}; | |
function UrlParser () { | |
if (!(this instanceof UrlParser)) { | |
return new UrlParser(); | |
} | |
this.handlers = {}; | |
this.registerHandler("m.imgur.com", imgur); | |
this.registerHandler("imgur.com", imgur); | |
this.registerHandler("vidble.com", vidble); | |
this.registerHandler("www.vidble.com", vidble); | |
this.registerHandler("gfycat.com", gfycat); | |
}; | |
UrlParser.prototype.parse = async(function (url) { | |
//trivial case | |
if (/.jpg$|.png$/.test(url) === true) { | |
return new Promise(function(resolve,reject){ return resolve({ images: [url] }); }); | |
} | |
try { | |
var parsedUrl = Url.parse(url); | |
var handler = this.handlers[parsedUrl.hostname]; | |
return handler(parsedUrl); | |
} | |
catch (err) { | |
throw "No handler for " + url; | |
} | |
}); | |
UrlParser.prototype.registerHandler = function (host, handler) { | |
this.handlers[host] = handler; | |
}; | |
UrlParser.prototype.unregisterHandler = function (host) { | |
this.handlers[host] = undefined; | |
}; | |
function imgur (parsedUrl) { | |
var imageHostName = "http://i.imgur.com"; | |
var imgurAlbumApi = "https://api.imgur.com/3/album/"; | |
var albumIdentifier = "/a/"; | |
return new Promise(function (resolve, reject) { | |
var path = parsedUrl.path; | |
if (path.indexOf(albumIdentifier) === 0) { | |
var lastSlash = path.lastIndexOf("/")+1; | |
var id = path.substring(lastSlash, path.length); | |
var options = { url: imgurAlbumApi + id + "/images", headers: CONFIG.imgurHeader } | |
var json = await(getJson(options)); | |
var images = []; | |
for (var i = 0; i < json.data.length; i++) { | |
images.push(json.data[i].link); | |
} | |
return resolve({ images: images, album: id }); | |
} | |
else { | |
return resolve({ images: [imageHostName + path + JPG] }); | |
} | |
}); | |
}; | |
function vidble (parsedUrl) { | |
var showIdentifier = "/show/"; | |
var albumIdentifier = "/album/"; | |
var vidibleHost = "http://www.vidble.com/"; | |
return new Promise(function (resolve, reject) { | |
var path = parsedUrl.path; | |
if (path.indexOf(showIdentifier) === 0) { | |
var lastSlash = path.lastIndexOf("/") + 1; | |
var id = path.substring(lastSlash, path.length); | |
return resolve({ images: [vidibleHost + id + JPG] }) | |
} | |
else if (path.indexOf(albumIdentifier) === 0) //C8CZwA3I?json=1 | |
{ | |
var lastSlash = path.lastIndexOf("/") + 1; | |
var id = path.substring(lastSlash, path.length); | |
var json = await(getJson(parsedUrl.href + "?json=1")); | |
var images = []; | |
for (var i = 0; i < json.pics.length; i++) { | |
images.push("http:" + json.pics[i]); | |
} | |
return resolve({ images: images, album: id }); | |
} | |
return reject(parsedUrl); | |
}); | |
}; | |
function gfycat (parsedUrl) { | |
return new Promise((resolve,reject)=>{ | |
try | |
{ | |
var json = await(getJson("https://gfycat.com/cajax/get"+parsedUrl.path)); | |
return resolve({images: [json.gfyItem.mp4Url]}) | |
} | |
catch(err) | |
{ | |
return reject(err); | |
} | |
}) | |
} | |
exports.CreateUrlParser = UrlParser; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment