Skip to content

Instantly share code, notes, and snippets.

@spullara
Created August 5, 2010 17:31
Show Gist options
  • Select an option

  • Save spullara/510063 to your computer and use it in GitHub Desktop.

Select an option

Save spullara/510063 to your computer and use it in GitHub Desktop.
package bagcheck.util;
import java.util.Stack;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Clean up Text
* <p/>
* User: sam
* Date: Aug 5, 2010
* Time: 9:25:02 AM
*/
public class Text {
private static Pattern matchTags = Pattern.compile("<(/?[^><]+?)>|<([^><]+?)");
private static Pattern allowedTags = Pattern.compile("/?(br|p|a|b|i|ol|li|ul|blockquote)");
private static Pattern allowedATag = Pattern.compile("a\\s+href=\"([^\"]+?)\"");
private static Pattern allowedHref = Pattern.compile("https?://[a-zA-Z0-9-_./]+");
public static String strip(String input) {
Stack<String> tags = new Stack<String>();
input = input.replace("&", "&amp;");
StringBuffer sb = new StringBuffer();
// Match what we allow
Matcher m1 = matchTags.matcher(input);
while (m1.find()) {
String tag = m1.group(1);
if (tag == null) {
m1.appendReplacement(sb, "&lt;" + m1.group(2));
} else if (allowedTags.matcher(tag).matches()) {
if (tag.startsWith("/")) {
if (tags.size() > 0 && tags.pop().equals(tag.substring(1))) {
} else {
m1.appendReplacement(sb, "");
continue;
}
} else {
if (!tag.equals("br")) tags.push(tag);
}
m1.appendReplacement(sb, "<" + tag + ">");
} else {
Matcher m2 = allowedATag.matcher(tag);
if (m2.matches() && allowedHref.matcher(m2.group(1)).matches()) {
m1.appendReplacement(sb, "<" + tag + " rel=\"nofollow\">");
tags.add("a");
} else {
m1.appendReplacement(sb, "");
}
}
}
m1.appendTail(sb);
return sb.toString().replace("\n", "<br>");
}
public static void main(String[] args) {
System.out.println(strip("<a href=\"http://lessthan10.com\">1 < 10</a>"));
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment