Skip to content

Instantly share code, notes, and snippets.

@nullberri
Created September 12, 2016 20:39
Show Gist options
  • Save nullberri/9f5094518970e5c2d279f6a63ca67064 to your computer and use it in GitHub Desktop.
Save nullberri/9f5094518970e5c2d279f6a63ca67064 to your computer and use it in GitHub Desktop.
var async = require('asyncawait/async');
var await = require('asyncawait/await');
var fs = require('fs-promise');
var CronJob = require('cron').CronJob;
var request = require('request');
var urlParser = require("./urlParse.js").CreateUrlParser();
var Redditor = require("mongoose").model("redditor");
var CONFIG = require('../../config/config.js').reddit
const batch = require('mongoose').model('batch');
const name = "reddit"
var downloadFile = function (url, dest) {
return new Promise(function (resolve, reject) {
request.get(url, function (err, res, body) {
if (err) { return reject(err); }
resolve();
}).pipe(fs.createWriteStream(dest)).on('error', function (e) { console.log(e.stack); });
});
};
var downloadRedditor = async(function (redditor) {
var path = CONFIG.downloadPath + redditor.username + "/";
//check if downloads exists
if (!await(fs.exists(CONFIG.downloadPath))) {
await(fs.mkdir(CONFIG.downloadPath));
}
//create folder for the username
if (!await(fs.exists(path))) {
await(fs.mkdir(path));
}
var links = await(redditor.links());
links.forEach(function (link) {
try {
var parsedUrls = await(urlParser.parse(link));
//todo fix this
path = CONFIG.downloadPath + redditor.username + "/";
//create folder for album
if (parsedUrls.album) {
path += parsedUrls.album + "/";
if (!await(fs.exists(path))) {
await(fs.mkdir(path));
}
}
parsedUrls.images.forEach(function (url) {
//extract filename
if(!url){return;}
console.log(url);
var filename = url.split("/").pop();
await(downloadFile(url, path + filename));
//is file a missing file from imgur
var stats = await(fs.stat(path + filename));
if (stats['size'] < 1000) {
//console.log("file was a missing image: " + path + filename);
await(fs.unlink(path + filename));
}
//console.log(url);
});
}
catch (err) {
console.log(err.stack)
}
});
});
///////////////////
//BEGIN API EXPORTS
///////////////////
exports.processRedditors = async(function () {
try {
var redditors = await(Redditor.find({}));
redditors.forEach(function (redditor) {
console.log(redditor.username);
await(downloadRedditor(redditor));
await(redditor.save());
});
await(batch.setTimestamp(name, Date.now()));
} catch (err) {
console.log(err.stack);
}
});
////////////////
//initialization
////////////////
if (global.PROD == true) {
new CronJob('15 * * * *', function () {
exports.processRedditors();
}, null, true);
}
'use strict'
var async = require('asyncawait/async');
var await = require('asyncawait/await');
var mongoose = require('mongoose');
var request = require('request-promise');
var redditor = mongoose.Schema({
username: String,
error: String,
lastUpdate: Number,
lastPost: Number,
});
redditor.methods.reset = async(function () {
this.lastUpdate = 0;
this.lastPost = 0;
this.error = "";
});
redditor.methods.links = async(function () {
var links = [];
this.lastUpdate = new Date().getTime();
this.error = "";
try {
var body = await(request(this.url+ ".json?limit=100", { gzip: true }));
var json = JSON.parse(body);
if (json.data && json.data.children) {
for (var i = json.data.children.length-1; i >= 0 ; i--) {
let entry = json.data.children[i].data;
let timestamp = entry.created_utc * 1000;
if (timestamp > this.lastPost) {
links.push(entry.url);
this.lastPost = timestamp;
}
}
}
//todo make this accept multiple errors;
if (Math.abs(this.lastUpdate - this.lastPost) > (86400000 * 120)) {
this.error += "More than 4 months since last post. ";
}
} catch (err) {
if (err.statusCode == 404) {
this.error += "User not found. ";
}
}
return links;
});
redditor.virtual('url').get(function () {
return "http://www.reddit.com/user/" + this.username + "/submitted/";
});
module.exports = mongoose.model('redditor', redditor);
var http = require('http');
var request = require('request');
var async = require('asyncawait/async');
var await = require('asyncawait/await');
var Url = require("url");
var CONFIG = require('../../config/config.js').reddit
var JPG = ".jpg";
//move this back to an environment file
function getJson (options) {
return new Promise(function (resolve, reject) {
request.get(options, function (err, res, body) {
if (err) { return reject(err); }
try {
var json = JSON.parse(body);
return resolve(json);
}
catch (e) {
console.log(e)
console.log(body)
reject(e)
}
});
});
};
function UrlParser () {
if (!(this instanceof UrlParser)) {
return new UrlParser();
}
this.handlers = {};
this.registerHandler("m.imgur.com", imgur);
this.registerHandler("imgur.com", imgur);
this.registerHandler("vidble.com", vidble);
this.registerHandler("www.vidble.com", vidble);
this.registerHandler("gfycat.com", gfycat);
};
UrlParser.prototype.parse = async(function (url) {
//trivial case
if (/.jpg$|.png$/.test(url) === true) {
return new Promise(function(resolve,reject){ return resolve({ images: [url] }); });
}
try {
var parsedUrl = Url.parse(url);
var handler = this.handlers[parsedUrl.hostname];
return handler(parsedUrl);
}
catch (err) {
throw "No handler for " + url;
}
});
UrlParser.prototype.registerHandler = function (host, handler) {
this.handlers[host] = handler;
};
UrlParser.prototype.unregisterHandler = function (host) {
this.handlers[host] = undefined;
};
function imgur (parsedUrl) {
var imageHostName = "http://i.imgur.com";
var imgurAlbumApi = "https://api.imgur.com/3/album/";
var albumIdentifier = "/a/";
return new Promise(function (resolve, reject) {
var path = parsedUrl.path;
if (path.indexOf(albumIdentifier) === 0) {
var lastSlash = path.lastIndexOf("/")+1;
var id = path.substring(lastSlash, path.length);
var options = { url: imgurAlbumApi + id + "/images", headers: CONFIG.imgurHeader }
var json = await(getJson(options));
var images = [];
for (var i = 0; i < json.data.length; i++) {
images.push(json.data[i].link);
}
return resolve({ images: images, album: id });
}
else {
return resolve({ images: [imageHostName + path + JPG] });
}
});
};
function vidble (parsedUrl) {
var showIdentifier = "/show/";
var albumIdentifier = "/album/";
var vidibleHost = "http://www.vidble.com/";
return new Promise(function (resolve, reject) {
var path = parsedUrl.path;
if (path.indexOf(showIdentifier) === 0) {
var lastSlash = path.lastIndexOf("/") + 1;
var id = path.substring(lastSlash, path.length);
return resolve({ images: [vidibleHost + id + JPG] })
}
else if (path.indexOf(albumIdentifier) === 0) //C8CZwA3I?json=1
{
var lastSlash = path.lastIndexOf("/") + 1;
var id = path.substring(lastSlash, path.length);
var json = await(getJson(parsedUrl.href + "?json=1"));
var images = [];
for (var i = 0; i < json.pics.length; i++) {
images.push("http:" + json.pics[i]);
}
return resolve({ images: images, album: id });
}
return reject(parsedUrl);
});
};
function gfycat (parsedUrl) {
return new Promise((resolve,reject)=>{
try
{
var json = await(getJson("https://gfycat.com/cajax/get"+parsedUrl.path));
return resolve({images: [json.gfyItem.mp4Url]})
}
catch(err)
{
return reject(err);
}
})
}
exports.CreateUrlParser = UrlParser;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment