Created
October 13, 2011 20:44
-
-
Save Tatsh/1285466 to your computer and use it in GitHub Desktop.
Crawl a site and make an RSS feed
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var http = require('http'); | |
var select = require('soupselect').select; | |
var htmlparser = require('htmlparser'); | |
var ua = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.83 Safari/535.2'; | |
var updateFromTheSite = function (callback) { | |
var cookies = 'uid=0000; pass=0000000000000000'; | |
var siteName = 'The Site name'; | |
var url = 'thedomain.com'; | |
var client = http.createClient(80, url); | |
var request = client.request('GET', '/browse.php', {'Host': url, 'Cookie': cookies, 'User-Agent': ua}); | |
request.on('response', function (response) { | |
response.setEncoding('utf8'); | |
var body = ''; | |
response.on('data', function (chunk) { | |
body += chunk; | |
}); | |
response.on('end', function () { | |
var handler = new htmlparser.DefaultHandler(function (error, dom) { | |
if (error) { | |
throw error; | |
} | |
var titles = select(dom, 'td[align="left"] a'); | |
var downloadUrl, title; | |
var torrents = []; | |
for (var i = 3; i < titles.length; i++) { | |
if (titles[i].children[0].raw === 'b') { | |
title = titles[i].children[0].children[0].raw; | |
downloadUrl = 'http://' + url + '/' + titles[i+1].attribs.href; | |
torrents.push({title: title, downloadUrl: downloadUrl}); | |
i++; | |
} | |
} | |
if (callback !== null) { | |
callback(torrents); | |
} | |
}); | |
var parser = new htmlparser.Parser(handler); | |
parser.parseComplete(body); | |
}); | |
}); | |
request.end(); | |
}; | |
http.createServer(function (req, res) { | |
res.writeHead(200, {'Content-Type': 'application/xml'}); | |
var d = new Date(); | |
// example: 2011-10-13 19:31:30 | |
var pad = function (n) { | |
return n < 10 ? '0'+n : n; | |
} | |
var added = d.getUTCFullYear() + '-' + pad(d.getUTCMonth()+1) + '-' + pad(d.getUTCDate()) + ' ' + pad(d.getHours()) + ':' + pad(d.getMinutes()) + ':' + pad(d.getSeconds()); | |
var xml = '<?xml version="1.0" encoding="utf-8"?>'; | |
xml += '<rss version="0.91">'; | |
xml += '<channel><title>The Site</title><link>http://thedomain.com</link>'; | |
xml += '<description></description>'; | |
updateFromTheSite(function (torrents) { | |
torrents.forEach(function (obj) { | |
xml += '<item>'; | |
xml += '<title>' + obj.title + '</title>'; | |
xml += '<link>' + obj.downloadUrl + '</link>'; | |
xml += '<description>Added: ' + added + '</description>'; | |
xml += '</item>'; | |
}); | |
xml += '</channel>'; | |
xml += '</rss>'; | |
res.end(xml); | |
}); | |
}).listen(22221, '127.0.0.1'); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment