Created
July 21, 2010 04:48
-
-
Save polotek/484083 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** Testing sax push parsing | |
* | |
* The output file should have almost identical | |
* to the input file | |
* | |
* Known issues with this example output | |
* | |
* - Doesn't escape entities. You can do it | |
* manually on the character content. | |
* - Doesn't replicate self-closing tags. | |
* They get an end element instead | |
**/ | |
var sys = require('sys') | |
, fs = require('fs') | |
, libxmljs = require('libxmljs'); | |
var d = new Date(); | |
// input xml file | |
var infile = fs.createReadStream('input.xml', {encoding:'utf8'}); | |
// output xml file | |
var outfile = fs.createWriteStream('output.xml', {encoding:'utf8'}); | |
var funcs = { | |
onStartDocument: function() { | |
console.log('starting...'); | |
outfile.write('<?xml version="1.0" encoding="utf-8"?>\n'); | |
} | |
, onEndDocument: function() { | |
outfile.end(); | |
console.log('done.'); | |
} | |
, onStartElementNS: function(elem, attrs, prefix, uri, namespaces) { | |
var sys = require('sys') | |
, out = [] | |
, aname, aprefix, aurl, aval; | |
out.push('<' + (prefix ? prefix + ':' : '') + elem); | |
if(attrs.length) { | |
for(var i=0, len=attrs.length; i<len; i++) { | |
out.push(' '); | |
aname = attrs[i][0] | |
, aprefix = attrs[i][1] | |
, aurl = attrs[i][2] | |
, aval = attrs[i][3]; | |
out.push((aprefix ? aprefix + ':' : '') | |
+ aname + '=' + '"' + (aval || '') + '"'); | |
} | |
} | |
if(namespaces.length) { | |
for(i=0, len=namespaces.length; i<len; i++) { | |
out.push(' '); | |
aname = namespaces[i][0]; | |
aval = namespaces[i][1]; | |
out.push('xmlns:' | |
+ aname + '=' + '"' + (aval || '') + '"'); | |
} | |
} | |
out.push('>'); | |
outfile.write(out.join('')); | |
} | |
, onEndElementNS: function(elem, prefix, uri) { | |
outfile.write('</' + (prefix ? prefix + ':' : '') + elem + '>'); | |
} | |
, onCharacters: function(chars) { | |
outfile.write(chars || ''); | |
} | |
, onCdata: function(chars) { | |
outfile.write('<![CDATA['); | |
outfile.write(chars || ''); | |
outfile.write(']]>'); | |
} | |
, onComment: function(chars) { | |
outfile.write('<!--'); | |
outfile.write(chars || ''); | |
outfile.write('-->'); | |
} | |
, onWarning: function(warning) { | |
console.warn(warning); | |
} | |
, onError: function(error) { | |
console.error('ERROR: ' + error); | |
} | |
} | |
var p = new libxmljs.SaxPushParser(function (cb) { | |
var keys = Object.keys(funcs) | |
, key, val; | |
for(var i=0;i<keys.length; i++) { | |
key = keys[i] | |
, val = funcs[key]; | |
cb[key](val); | |
} | |
}); | |
infile.on('data', function(chunk) { | |
if(chunk) p.push(chunk); | |
}); | |
infile.on('end', function() { | |
console.log('TIME: ' + (new Date() - d) + ' ms'); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment