Last active
February 1, 2016 04:44
-
-
Save fero23/dc104f6deb891559b8f6 to your computer and use it in GitHub Desktop.
Random RSS blog feed compilator for my web novel needs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"use strict"; | |
var cheerio = require("cheerio"); | |
var request = require("request"); | |
var _ = require("lodash"); | |
var parseString = require('xml2js').parseString; | |
var ProgressWatcher = require("./progress_watcher"); | |
let urls = { | |
drupal: [ | |
"http://skythewoodtl.com" | |
], | |
wordpress: [ | |
"https://shintranslations.com", | |
"http://circustranslations.com", | |
"https://isekailunatic.wordpress.com", | |
"http://arkmachinetranslations.com", | |
"http://trinityarchive.com", | |
"https://infinitenoveltranslations.wordpress.com", | |
"http://japtem.com" | |
], | |
blogspot: [ | |
"http://www.sousetsuka.com", | |
"http://thelordofpie.blogspot.com" | |
] | |
} | |
let bannedArticles = [ | |
"Welcome to Amsterdam", | |
"Schedule for TNG" | |
] | |
module.exports = function (req, res, next) { | |
let urlCount = 0; | |
for (let prop in urls) { | |
urlCount += urls[prop].length; | |
} | |
let pw = new ProgressWatcher(urlCount); | |
for (let prop in urls) { | |
switch (prop) { | |
case "wordpress": | |
urls[prop].forEach(url => new RSSScrapper(url + "/feed").scrap(pw)); | |
break; | |
case "blogspot": | |
urls[prop].forEach(url => new AtomScrapper(url + "/feeds/posts/default").scrap(pw)); | |
break; | |
case "drupal": | |
urls[prop].forEach(url => new RSSScrapper(url + "/rss.xml").scrap(pw)); | |
break; | |
} | |
} | |
pw.onComplete(articles => { | |
let sorted = _(articles).chain().flatten().sortBy("pubDate").reverse().value(); | |
if (_.some(sorted, article => article.pubDate.toString == "Invalid Date")) { | |
console.warn("Some articles have problems with dates"); | |
} | |
res.set('Content-Type', 'text/xml').render("rss", { | |
articles: sorted | |
}); | |
}); | |
} | |
function printError(url, err) { | |
console.error("Error on " + url + ":\n" + err.message); | |
} | |
class RSSScrapper { | |
constructor(url) { | |
this.url = url; | |
} | |
scrap(pw) { | |
request(this.url, (err, res, html) => { | |
if (err) { | |
printError(this.url, err); | |
pw.progress([]); | |
} | |
else { | |
parseString(html, (err, result) => { | |
try { | |
pw.progress(result.rss.channel[0].item.map(item => { | |
if (_.some(bannedArticles, banned => banned == item.title[0])) { | |
return null; | |
} | |
return { | |
title: item.title[0], | |
link: item.link[0], | |
description: item.description[0], | |
pubDate: new Date(item.pubDate[0]) | |
}; | |
}).filter(article => article != null)); | |
} | |
catch (err) { | |
printError(this.url, err); | |
pw.progress([]); | |
} | |
}); | |
} | |
}); | |
} | |
} | |
class AtomScrapper { | |
constructor(url) { | |
this.url = url; | |
} | |
scrap(pw) { | |
request(this.url, (err, res, html) => { | |
if (err) { | |
printError(this.url, err); | |
pw.progress([]); | |
} | |
else { | |
parseString(html, (err, result) => { | |
try { | |
pw.progress(result.feed.entry.map(item => { | |
if (_.some(bannedArticles, banned => banned == item.title[0])) { | |
return null; | |
} | |
return { | |
title: item.title[0]._, | |
link: item.link.filter(l => l.$.type == "text/html")[0].$.href, | |
description: _(cheerio.load("<html><body>" + | |
item.content[0]._ + "</body></html>")("body") | |
.text()).chain().words(/[^\s]+/g).take(100).value().join(" ") + " [...]", | |
pubDate: new Date(item.published[0]) | |
}; | |
}).filter(article => article != null)); | |
} | |
catch (err) { | |
printError(this.url, err); | |
pw.progress([]); | |
} | |
}); | |
} | |
}); | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"use strict"; | |
module.exports = class { | |
constructor(count) { | |
this.count = count; | |
this.current = 0; | |
this.results = []; | |
this.onCompleteCbArr = []; | |
this.onErrorCbArr = []; | |
this.finallyCbArr = []; | |
} | |
progress(obj) { | |
if (obj instanceof Error) { | |
this.error = obj; | |
} | |
else { | |
this.results.push(obj); | |
} | |
if (!this.error) { | |
if (this.error) { | |
this.onErrorCbArr.forEach(cb => cb(this.error)); | |
this.finallyCbArr.forEach(cb => cb()); | |
} | |
else if (++this.current == this.count) { | |
this.onCompleteCbArr.forEach(cb => cb(this.results)); | |
this.finallyCbArr.forEach(cb => cb()); | |
} | |
} | |
} | |
hasError() { | |
return this.error !== null && this.error !== undefined; | |
} | |
onError(cb) { | |
this.onErrorCbArr.push(cb); | |
} | |
onComplete(cb) { | |
this.onCompleteCbArr.push(cb); | |
} | |
finally(cb) { | |
this.finallyCbArr.push(cb); | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
doctype xml | |
rss(version='2.0') | |
channel | |
title Novel RSS Compilator | |
description Compilator of the translator websites' RSS feed of my favorite novels. | |
each article in articles | |
item | |
title= article.title | |
link= article.link | |
description= article.description | |
pubDate= article.pubDate.toUTCString() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment