Skip to content

Instantly share code, notes, and snippets.

@shinaisan
Last active May 16, 2017 03:39
Show Gist options
  • Save shinaisan/8fe33f6d179a3deef6114c2f8f1b483d to your computer and use it in GitHub Desktop.
Save shinaisan/8fe33f6d179a3deef6114c2f8f1b483d to your computer and use it in GitHub Desktop.
Node.js xmldom whitespace stripping sample
node_modules
<table>
<tr><td>Alpha<!-- COMMENT --></td><td>&#x0391;</td><td>U+0391</td></tr>
<tr><td>Beta</td><td>&#x0392;</td><td>U+0392</td></tr>
<tr><td>Gamma</td><td>&#x0393;</td><td>U+0393</td></tr>
<tr><td>Delta</td><td>&#x0394;</td><td>U+0394</td></tr>
<tr><td>Epsilon</td><td>&#x0395;</td><td>U+0395</td></tr>
<tr><td>Zeta</td><td>&#x0396;</td><td>U+0396</td></tr>
<tr><td>Eta</td><td>&#x0397;</td><td>U+0397</td></tr>
<tr><td>Theta</td><td>&#x0398;</td><td>U+0398</td></tr>
<tr><td>Iota</td><td>&#x0399;</td><td>U+0399</td></tr>
<tr><td>Kappa</td><td>&#x039A;</td><td>U+039A</td></tr>
<tr><td>Lambda</td><td>&#x039B;</td><td>U+039B</td></tr>
<tr><td>Mu</td><td>&#x039C;</td><td>U+039C</td></tr>
<tr><td>Nu</td><td>&#x039D;</td><td>U+039D</td></tr>
<tr><td>Xi</td><td>&#x039E;</td><td>U+039E</td></tr>
<tr><td>Omicron</td><td>&#x039F;</td><td>U+039F</td></tr>
<tr><td>Pi</td><td>&#x03A0;</td><td>U+03A0</td></tr>
<tr><td>Rho</td><td>&#x03A1;</td><td>U+03A1</td></tr>
<tr><td>Sigma</td><td>&#x03A3;</td><td>U+03A3</td></tr>
<tr><td>Tau</td><td>&#x03A4;</td><td>U+03A4</td></tr>
<tr><td>Upsilon</td><td>&#x03A5;</td><td>U+03A5</td></tr>
<tr><td>Phi</td><td>&#x03A6;</td><td>U+03A6</td></tr>
<tr><td>Chi</td><td>&#x03A7;</td><td>U+03A7</td></tr>
<tr><td>Psi</td><td>&#x03A8;</td><td>U+03A8</td></tr>
<tr><td>Omega</td><td>&#x03A9;</td><td>U+03A9</td></tr>
<tr><td>alpha</td><td>&#x03B1;</td><td>U+03B1</td></tr>
<tr><td>beta</td><td>&#x03B2;</td><td>U+03B2</td></tr>
<tr><td>gamma</td><td>&#x03B3;</td><td>U+03B3</td></tr>
<tr><td>delta</td><td>&#x03B4;</td><td>U+03B4</td></tr>
<tr><td>epsilon</td><td>&#x03B5;</td><td>U+03B5</td></tr>
<tr><td>zeta</td><td>&#x03B6;</td><td>U+03B6</td></tr>
<tr><td>eta</td><td>&#x03B7;</td><td>U+03B7</td></tr>
<tr><td>theta</td><td>&#x03B8;</td><td>U+03B8</td></tr>
<tr><td>iota</td><td>&#x03B9;</td><td>U+03B9</td></tr>
<tr><td>kappa</td><td>&#x03BA;</td><td>U+03BA</td></tr>
<tr><td>lambda</td><td>&#x03BB;</td><td>U+03BB</td></tr>
<tr><td>mu</td><td>&#x03BC;</td><td>U+03BC</td></tr>
<tr><td>nu</td><td>&#x03BD;</td><td>U+03BD</td></tr>
<tr><td>xi</td><td>&#x03BE;</td><td>U+03BE</td></tr>
<tr><td>omicron</td><td>&#x03BF;</td><td>U+03BF</td></tr>
<tr><td>pi</td><td>&#x03C0;</td><td>U+03C0</td></tr>
<tr><td>rho</td><td>&#x03C1;</td><td>U+03C1</td></tr>
<tr><td>sigmaf</td><td>&#x03C2;</td><td>U+03C2</td></tr>
<tr><td>sigma</td><td>&#x03C3;</td><td>U+03C3</td></tr>
<tr><td>tau</td><td>&#x03C4;</td><td>U+03C4</td></tr>
<tr><td>upsilon</td><td>&#x03C5;</td><td>U+03C5</td></tr>
<tr><td>phi</td><td>&#x03C6;</td><td>U+03C6</td></tr>
<tr><td>chi</td><td>&#x03C7;</td><td>U+03C7</td></tr>
<tr><td>psi</td><td>&#x03C8;</td><td>U+03C8</td></tr>
<tr><td>omega</td><td>&#x03C9;</td><td>U+03C9</td></tr>
<tr><td>thetasym</td><td>&#x03D1;</td><td>U+03D1</td></tr>
<tr><td>upsih</td><td>&#x03D2;</td><td>U+03D2</td></tr>
<tr><td>piv</td><td>&#x03D6;</td><td>U+03D6</td></tr>
</table>
var xml = require('xmldom');
var fs = require('fs');
function main(outputType, fileName) {
var s = fs.readFileSync(fileName).toString();
var parser = new xml.DOMParser();
var ser = new xml.XMLSerializer();
var dom = parser.parseFromString(s);
wsstrip(dom.firstChild);
if (outputType === 'json') {
var j = toJson(dom.firstChild);
console.log(JSON.stringify(j, null, 2));
} else {
var t = ser.serializeToString(dom);
console.log(t);
}
}
function wsstrip(node) {
var wsre = /[ \t\n]+/;
if (node.nodeType == node.TEXT_NODE) {
if (wsre.test(node.nodeValue)) {
return null;
}
}
if (node.childNodes) {
var wsnodes = [];
for (var i = 0; i < node.childNodes.length; i++) {
var ch = node.childNodes[i]
var c = wsstrip(ch);
if (!c) {
wsnodes.push(ch);
}
}
for (var i = 0; i < wsnodes.length; i++) {
node.removeChild(wsnodes[i]);
}
}
return node;
}
function toJson(node) {
var children = [];
if (node.childNodes) {
for (var i = 0; i < node.childNodes.length; i++) {
var ch = node.childNodes[i];
children.push(toJson(ch));
}
}
return {
tagName: node.tagName,
nodeName: node.nodeName,
nodeType: node.nodeType,
nodeValue: node.nodeValue,
data: node.data,
children: children
};
}
if (require.main === module) {
main(process.argv[2], process.argv[3]);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment