Created
November 29, 2011 17:56
-
-
Save benbuckman/1405720 to your computer and use it in GitHub Desktop.
node.js script to parse Drupal logs in linux syslog (and find distinct 404'd URLs)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// drupal log parser w/ node.js | |
// takes a filtered syslog file | |
// run as node `drupal-syslog-parser.js LOGPATH` | |
// [install dependencies (lazy,underscore) first with `npm install ___`] | |
var lazy = require('lazy') | |
, fs = require('fs') | |
, path = require('path') | |
, _ = require('underscore'); | |
var logPath = null; | |
process.argv.forEach(function (val, index, array) { | |
// console.log(index + ': ' + val); | |
if (index == 2 && val != "") { | |
var exists = path.existsSync(val); | |
if (! exists) { | |
console.log("Can't find log file ", val); | |
process.exit(1); | |
} | |
logPath = val; | |
} | |
}); | |
if (logPath == null) { | |
console.log("No log path param."); | |
process.exit(1); | |
} | |
var countLines = 0 | |
, lines, lineSplit | |
, parts | |
, mapping = { 0:'heading', 1:'timestamp', 2:'type', 3:'ip', 4:'url', 5:'referer', 6:'severity', 7:'message', 8:'link'} // [?] need to confirm | |
, logs = [] | |
; | |
// loads the whole file first, bad for memory, but unclear how else | |
fs.readFile(logPath, function(err, data) { | |
if (err) throw(err); | |
lines = data.toString().split("\n"); | |
console.log("Found %d lines", lines.length); | |
// reduce (tmp) | |
// lines = lines.slice(0, 10); | |
lines.forEach(function(line, ind) { | |
// console.log(ind + ". " + line); | |
lineSplit = line.split('|'); | |
// map keys in mapping to parts | |
parts = {}; | |
_.each(mapping, function(key, ind) { | |
if (! _.isUndefined(lineSplit[ind])) { | |
parts[key] = lineSplit[ind]; | |
} | |
else parts[key] = null; | |
}); | |
logs.push(parts); | |
}); //each line | |
// == at this point, logs contains array of log objects == | |
// console.log(logs); | |
console.log("Parsed %d log lines", logs.length); | |
// find the unique URLs in 404s | |
var missingUrls = []; | |
_.each(logs, function(log, ind) { | |
if (log.type == 'page not found') { | |
missingUrls.push(log.url); | |
} | |
}); | |
// distinct & sort | |
missingUrls = _.uniq(missingUrls); | |
missingUrls.sort(); | |
console.log("%d distinct missing URLs\n", missingUrls.length); | |
console.log(missingUrls.join("\n")); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment