Created
October 2, 2012 06:42
-
-
Save devongovett/3816875 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* RSS Subscription Count Node.js Proxy Server | |
* Devon Govett | |
* Public Domain | |
* | |
* This script is a simple proxy server for my blog's RSS feed that keeps track of requests | |
* to the feed and attempts to guess a subscriber count. | |
* Google Reader represents the majority of my subscriber base, and they give you actual | |
* subscriber numbers when their spider requests your feed. Other aggregators do something similar. | |
* Otherwise, this script tracks unique IP addresses over a 24 hour period. | |
* | |
* Data is saved on an hourly basis into data.json, as well as when the script is about to exit. | |
* When it is relaunched, it attempts to restore the information in that file. | |
* Once per day, a log of the subscriber information for that day is appended to subscribers.txt. | |
* | |
* Hopefully this script is of use to others. I'm sure it can be improved, so feel free to fork away! | |
*/ | |
// configuration | |
var USERNAME = 'username'; | |
var PASSWORD = 'password'; | |
var RSS_URL = 'http://example.com/feed.xml'; | |
var express = require('express'); | |
var request = require('request'); | |
var fs = require('fs'); | |
var ONE_DAY = 1000 * 60 * 60 * 24; | |
var data = { | |
ips: {}, | |
googleReader: {}, | |
otherAgs: {} | |
}; | |
// try reading current subscriber data from the file | |
try { | |
data = JSON.parse(fs.readFileSync('data.json')); | |
var stat = fs.statSync('data.json'); | |
// clear IPs if too old | |
if (stat.mtime < Date.now() - ONE_DAY) { | |
data.ips = {}; | |
} | |
} catch (e) {}; | |
var app = express(); | |
app.get('/', function(req, res) { | |
var userAgent = req.headers['user-agent']; | |
var match; | |
// Google Reader sends subscriber count in the User-Agent header | |
// It also sends a feed-id parameter for uniqueness | |
// see http://support.google.com/reader/bin/answer.py?hl=en&answer=70001 | |
if (userAgent && (match = userAgent.match(/([0-9]+) subscribers?; feed-id=([0-9]+)/i))) { | |
var subscribers = match[1], feedId = match[2]; | |
data.googleReader[feedId] = +subscribers; | |
} | |
// Some other aggregators send the subscribers but no feed-id | |
else if (userAgent && (match = userAgent.match(/([0-9]+) (?:subscribers?|readers?)/i))) { | |
var subscribers = match[1]; | |
data.otherAgs[userAgent.replace(/([0-9]+) (?:subscribers?|readers?)/i, '')] = +subscribers; | |
} | |
// Otherwise, track unique IP addresses | |
else { | |
// use the x-forwarded-for header if you're behind a reverse proxy like I am | |
data.ips[req.headers['x-forwarded-for'] || req.connection.remoteAddress] = 1; | |
} | |
// Send the actual feed | |
req.pipe(request(RSS_URL)).pipe(res); | |
}); | |
// right aligns a number | |
function pad(s) { | |
return (Array(7).join(' ') + s).slice(-7); | |
} | |
// sums the values of an object | |
function sum(o) { | |
var sum = 0; | |
for (var k in o) | |
sum += o[k]; | |
return sum; | |
} | |
// stats page (up to the minute) | |
app.get('/stats', express.basicAuth(USERNAME, PASSWORD), function(req, res) { | |
var googleReader = sum(data.googleReader), | |
otherAgs = sum(data.otherAgs), | |
direct = sum(data.ips), | |
total = googleReader + otherAgs + direct; | |
res.type('text'); | |
res.write('Subscribers For Today:\n'); | |
res.write('--------------------------\n'); | |
res.write('Google Reader: ' + pad(googleReader) + '\n'); | |
res.write('Other aggregators: ' + pad(otherAgs) + '\n'); | |
res.write('Direct: ' + pad(direct) + '\n'); | |
res.write('--------------------------\n'); | |
res.write('TOTAL: ' + pad(total)); | |
res.end(); | |
}); | |
// clear the unique IP addresses and write subscriber logs once per day | |
setInterval(function() { | |
// write subscriber logs | |
var fd = fs.openSync('subscribers.txt', 'a'); | |
var googleReader = sum(data.googleReader), | |
otherAgs = sum(data.otherAgs), | |
direct = sum(data.ips), | |
total = googleReader + otherAgs + direct; | |
fs.writeSync(fd, | |
new Date() + | |
' -- Google Reader: ' + googleReader + | |
', Other aggregators: ' + otherAgs + | |
', Direct: ' + direct + | |
', TOTAL: ' + total + | |
'\n' | |
); | |
fs.closeSync(fd); | |
// clear IPS | |
data.ips = {}; | |
}, ONE_DAY); | |
// write the current data to a file once an hour, and when the program exits | |
function write() { | |
fs.writeFileSync('data.json', JSON.stringify(data)); | |
} | |
setInterval(write, 1000 * 60 * 60); | |
process.on('SIGINT', function() { | |
write(); | |
process.exit(); | |
}); | |
app.listen(3000); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment