Last active
January 2, 2016 06:49
-
-
Save 8bitDesigner/8265831 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version='1.0' encoding='utf-8'?> | |
<feed xmlns='http://www.w3.org/2005/Atom'> | |
<title><%= title %></title> | |
<link rel='alternate' type='text/html' href='<%= url %>' /> | |
<updated><%= (new Date).toISOString() %></updated> | |
<% entries.forEach(function(entry) { %> | |
<entry> | |
<title><%= entry.title %></title> | |
<link rel='alternate' type='text/html' href='<%= entry.url %>' /> | |
<id><%= entry.id %></id> | |
<published><%= entry.published.toISOString() %></published> | |
<author> | |
<name><%= entry.author %></name> | |
</author> | |
<content type='html' xml:lang='en'> | |
<![CDATA[ <%= entry.html %> ]]> | |
</content> | |
</entry> | |
<% }) %> | |
</feed> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var jsdom = require('jsdom') | |
, http = require('http') | |
, async = require('async') | |
, _ = require('underscore') | |
, port = process.env.PORT || 5000 | |
, feedXml = require('fs').readFileSync('./feed.xml').toString() | |
var url = 'http://help.octgn.net/support/discussions/topics/46055' | |
http.createServer(function(req, res) { | |
function bail(msg) { | |
res.writeHead(500) | |
res.end(msg) | |
} | |
fetchPages(url, function(err, pages) { | |
if (err) { return bail(err) } | |
async.map(pages, fetchFeed, function(err, feeds) { | |
if (err) { return bail(err) } | |
var response = { | |
title: 'Mac test client', | |
url: url, | |
entries: _(feeds).flatten(1) | |
} | |
res.writeHead(200, { 'Content-Type': 'application/atom+xml' }); | |
res.end(_.template(feedXml, response), 'utf8') | |
}) | |
}) | |
}).listen(port) | |
console.log('Listening on port '+port) | |
function fetchFeed(url, cb) { | |
jsdom.env({ | |
url: url, | |
scripts: ["http://code.jquery.com/jquery.js"], | |
done: function (errors, window) { | |
var $ = window.jQuery; | |
if (errors) { return cb(errors) } | |
return cb(null, | |
$('.user-comment').toArray().map(function(comment) { | |
comment = $(comment) | |
return { | |
title: comment.find('.p-content').text().trim().substring(0, 50) + '...', | |
author: comment.find('.user-name').text(), | |
published: new Date(comment.find('.timeago').data('timeago')), | |
link: url + '#' + comment.attr('id'), | |
id: comment.attr('id').split('-').pop(), | |
html: comment.find('.p-content .p-desc').html().trim() | |
} | |
}) | |
) | |
} | |
}) | |
} | |
function fetchPages(url, cb) { | |
jsdom.env({ | |
url: url, | |
scripts: ["http://code.jquery.com/jquery.js"], | |
done: function (errors, window) { | |
var $ = window.jQuery; | |
if (errors) { return cb(errors) } | |
function convertTojQuery(node) { return $(node) } | |
function hasUrl(node) { return node.attr('href') } | |
function isNumeric(node) { return !isNaN(parseInt(node.text(), 10)) } | |
function getUrl(node) { return url + '/page/' + $(node).text() } | |
var urls = $('.pagination').find('a').toArray().map(convertTojQuery) | |
.filter(hasUrl).filter(isNumeric).map(getUrl) | |
urls.unshift(url) | |
return cb(null, urls) | |
} | |
}) | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"name": "forum-to-rss-scraper", | |
"version": "0.0.0", | |
"description": "Script to pull a forum topic into an RSS feed via screen scraping", | |
"main": "index.js", | |
"dependencies": { | |
"jsdom": "~0.8.10", | |
"underscore": "~1.5.2", | |
"async": "~0.2.9" | |
}, | |
"engines": { | |
"node": "0.10.x" | |
}, | |
"author": "Paul Sweeney <[email protected]>", | |
"license": "BSD" | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
web: node index.js |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment