Created
March 2, 2012 22:38
-
-
Save dmh2000/1961957 to your computer and use it in GitHub Desktop.
How to use AntiSamy-java from a node.js application to sanitize HTML
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var java = require('java'); | |
var util = require('util'); | |
// ------------------------------------------------- | |
// How to use AntiSamy from Node to sanitize HTML | |
// ------------------------------------------------- | |
// sanitizing HTML input is a science in itself. better not to reinvent the wheel | |
// the AntiSamy main website is https://www.owasp.org/index.php/Category:OWASP_AntiSamy_Project | |
// I used the Java version of AntiSamy, and the latest from https://github.com/nearinfinity/node-java bridge interface. | |
// Thanks JoeFerner | |
// I used the 'Sync' versions of all the node-java calls for simplicity. it looks to me like | |
// the only IO happens when you load the policy and get the policy and AntiSamy instance, therefore | |
// the scanning can be synchronous. if you want, load the policy and antisamy instance async | |
// to download and install AntiSamy, go to http://code.google.com/p/owaspantisamy/downloads/list | |
// download the Develope_Guide.pdf and follow its instructions | |
// I used the following required jar files in the most recent versions | |
java.classpath.push("commons-lang3-3.1.jar"); | |
java.classpath.push('batik-css.jar'); // from apache.org > batik-1.7/lib/batik-css.jar | |
java.classpath.push('xercesImpl.jar'); // from apache.org > xerces-2_11_0/xercesImpl.jar | |
java.classpath.push('xml-apis.jar'); // from apache.org > xerces-2_11_0/xml-apis.jar | |
java.classpath.push('antisamy-1.4.4.jar'); // from the antisamy download | |
java.classpath.push('nekohtml.jar'); // from http://sourceforge.net/projects/nekohtml/ | |
var i; | |
// the string to scan | |
var s = "<p><script>alert();</script></p><span></span>"; | |
// get a policy object loaded with the desired policy | |
var policy = java.callStaticMethodSync("org.owasp.validator.html.Policy","getInstance","antisamy-slashdot-1.4.4.xml"); | |
// get an instance of the AntiSamy class | |
var as = java.newInstanceSync("org.owasp.validator.html.AntiSamy"); | |
// scan the string | |
var cr = as.scanSync(s,policy); | |
// get the sanitized HTML | |
var html = cr.getCleanHTMLSync(); | |
// get a Java ArrayList of the messages generated by the scan | |
var elist = cr.getErrorMessagesSync(); | |
// get the length of the ArrayList | |
var len = elist.sizeSync(); | |
// display the sanitized HTML | |
console.log(html); | |
// display scan messages | |
for(i=0;i<len;++i) { | |
console.log(elist.getSync(i)); | |
} | |
/* Output for the string "<p><script>alert();</script></p><span></span>" | |
<p /> | |
The script tag is not allowed for security reasons. This tag should not affect the display of the input. | |
The span tag was empty, and therefore we could not process it. The rest of the message is intact, and its removal should not have any side effects. | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment