Skip to content

Instantly share code, notes, and snippets.

@jarib
Created July 12, 2014 13:57
Show Gist options
  • Select an option

  • Save jarib/e96a14573ec7b86a62a3 to your computer and use it in GitHub Desktop.

Select an option

Save jarib/e96a14573ec7b86a62a3 to your computer and use it in GitHub Desktop.
var XmlStream = require('xml-stream'),
stream = new XmlStream(process.stdin);
var currentPageTitle = null,
currentPageId = null,
rx = {
storting: /^85\.88\.(6[4-9]|[7-8]\d|9[0-5])\./,
regjering: /^132\.150\./
},
count = 0;
stream.on('endElement: page > id', function (obj) {
count++;
currentPageId = obj.$text;
if (count % 5 == 0) {
process.stderr.write(count + '\n');
}
});
stream.on('endElement: page > title', function (obj) {
currentPageTitle = obj.$text;
});
stream.on('endElement: page > revision', function (revision) {
if (!revision.contributor.ip) {
return;
}
var source;
if (revision.contributor.ip.match(rx.storting)) {
source = 'Stortinget';
} else if (revision.contributor.ip.match(rx.regjering)) {
source = 'Regjeringskontorene';
} else {
return;
}
var rev = {source: source};
rev.title = currentPageTitle;
rev.page_id = +currentPageId;
rev.revision_id = +revision.id;
rev.timestamp = new Date(revision.timestamp).getTime();
rev.contributor_ip = revision.contributor.ip;
rev.comment = revision.comment;
console.log(JSON.stringify(rev));
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment