Created
December 19, 2011 23:43
-
-
Save tomerd/1499453 to your computer and use it in GitHub Desktop.
a less simple HtmlSanitizer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public final class LessSimpleHtmlSanitizer implements HtmlSanitizer | |
{ | |
private static final String[] SIMPLE_TAGS = {"b", "em", "i", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "br", "ul", "ol", "li", "img"}; | |
private static final String[] CONTAINER_TAGS = {"html", "body", "table", "tbody", "tfoot", "th", "tr", "td", "p", "div", "span", "pre"}; | |
public SafeHtml sanitize(String html) | |
{ | |
return sanitizeHtml(html); | |
} | |
public static SafeHtml sanitizeHtml(String html) | |
{ | |
if (html == null) throw new NullPointerException("html is null"); | |
return SafeHtmlUtils.fromTrustedString(sanitizeString(html)); | |
} | |
private static String sanitizeString(String text) | |
{ | |
if (null == text || 0 == text.length()) return null; | |
StringBuilder builder = new StringBuilder(); | |
boolean firstSegment = true; | |
for (String segment : text.split("<", -1)) | |
{ | |
if (firstSegment) | |
{ | |
firstSegment = false; | |
if (segment.length() > 0) builder.append(SafeHtmlUtils.htmlEscapeAllowEntities(segment)); | |
continue; | |
} | |
int tagStart = ('/' == segment.charAt(0)) ? 1 : 0; | |
boolean endTag = 1 == tagStart; | |
int tagEnd = segment.indexOf('>'); | |
boolean selfClosing = '/' == segment.charAt(tagEnd-1); | |
if (selfClosing) tagEnd--; | |
int attributesStart = segment.substring(tagStart, tagEnd).indexOf(' '); | |
String tag = segment.substring(tagStart, attributesStart > 0 ? attributesStart : tagEnd).toLowerCase(); | |
// FIXME: need to sanitize attributes as well (onclick, etc can be exploited) | |
String attributes = attributesStart > 0 ? segment.substring(attributesStart+1, tagEnd) : null; | |
boolean valid = ArrayUtil.indexOf(SIMPLE_TAGS, tag) >= 0 || ArrayUtil.indexOf(CONTAINER_TAGS, tag) >= 0; | |
if (!valid) | |
{ | |
// escape it | |
builder.append("<").append(SafeHtmlUtils.htmlEscapeAllowEntities(segment)); | |
continue; | |
} | |
if (endTag) | |
{ | |
// close tag | |
builder.append("</").append(tag).append('>'); | |
} | |
else if (selfClosing) | |
{ | |
// self closing tag | |
builder.append('<').append(tag); | |
if (null != attributes) builder.append(' ').append(attributes); | |
builder.append("/>"); | |
String leftover = segment.substring(tagEnd + 2); | |
if (leftover.length() > 0) builder.append(SafeHtmlUtils.htmlEscapeAllowEntities(leftover)); | |
} | |
else | |
{ | |
// continue building | |
builder.append('<').append(tag); | |
if (null != attributes) builder.append(' ').append(attributes); | |
builder.append('>'); | |
String content = segment.substring(tagEnd + 1); | |
if (content.length() > 0) builder.append(sanitizeString(content)); | |
} | |
} | |
return builder.toString(); | |
} | |
private LessSimpleHtmlSanitizer() | |
{ | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment